diff --git a/src/coreclr/clrdefinitions.cmake b/src/coreclr/clrdefinitions.cmake index 9472c5f88ad720..b5a43f5057a47b 100644 --- a/src/coreclr/clrdefinitions.cmake +++ b/src/coreclr/clrdefinitions.cmake @@ -197,12 +197,12 @@ if (CLR_CMAKE_TARGET_ARCH_AMD64) add_definitions(-DUNIX_AMD64_ABI_ITF) endif (CLR_CMAKE_TARGET_ARCH_AMD64) add_definitions(-DFEATURE_USE_ASM_GC_WRITE_BARRIERS) -if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) +if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64) add_definitions(-DFEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) -endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) -if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) +endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64) +if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64) add_definitions(-DFEATURE_MANUALLY_MANAGED_CARD_BUNDLES) -endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) +endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64) if(NOT CLR_CMAKE_TARGET_UNIX) add_definitions(-DFEATURE_WIN32_REGISTRY) @@ -275,6 +275,10 @@ function(set_target_definitions_to_custom_os_and_arch) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_MULTIREG_RETURN) elseif((TARGETDETAILS_ARCH STREQUAL "arm") OR (TARGETDETAILS_ARCH STREQUAL "armel")) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE TARGET_ARM) + elseif((TARGETDETAILS_ARCH STREQUAL "riscv64")) + target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE TARGET_64BIT) + target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE TARGET_RISCV64) + target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_MULTIREG_RETURN) endif() if (TARGETDETAILS_ARCH STREQUAL "armel") diff --git a/src/coreclr/debug/createdump/createdumpunix.cpp b/src/coreclr/debug/createdump/createdumpunix.cpp index f86a283546a356..86c63e9482037a 100644 --- a/src/coreclr/debug/createdump/createdumpunix.cpp +++ b/src/coreclr/debug/createdump/createdumpunix.cpp @@ -3,7 +3,7 @@ #include "createdump.h" -#if defined(__arm__) || defined(__aarch64__) || defined(__loongarch64) +#if defined(__arm__) || defined(__aarch64__) || defined(__loongarch64) || defined(__riscv) long g_pageSize = 0; #endif @@ -19,7 +19,7 @@ CreateDump(const char* dumpPathTemplate, int pid, const char* dumpType, MINIDUMP bool result = false; // Initialize PAGE_SIZE -#if defined(__arm__) || defined(__aarch64__) || defined(__loongarch64) +#if defined(__arm__) || defined(__aarch64__) || defined(__loongarch64) || defined(__riscv) g_pageSize = sysconf(_SC_PAGESIZE); #endif TRACE("PAGE_SIZE %d\n", PAGE_SIZE); diff --git a/src/coreclr/debug/createdump/datatarget.cpp b/src/coreclr/debug/createdump/datatarget.cpp index 7ef154e034c8e2..0bf0ed23c66c5c 100644 --- a/src/coreclr/debug/createdump/datatarget.cpp +++ b/src/coreclr/debug/createdump/datatarget.cpp @@ -77,6 +77,8 @@ DumpDataTarget::GetMachineType( *machine = IMAGE_FILE_MACHINE_I386; #elif HOST_LOONGARCH64 *machine = IMAGE_FILE_MACHINE_LOONGARCH64; +#elif HOST_RISCV64 + *machine = IMAGE_FILE_MACHINE_RISCV64; #else #error Unsupported architecture #endif @@ -87,7 +89,7 @@ HRESULT STDMETHODCALLTYPE DumpDataTarget::GetPointerSize( /* [out] */ ULONG32 *size) { -#if defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) +#if defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) || defined(HOST_RISCV64) *size = 8; #elif defined(HOST_ARM) || defined(HOST_X86) *size = 4; diff --git a/src/coreclr/debug/createdump/dumpwriterelf.h b/src/coreclr/debug/createdump/dumpwriterelf.h index cb8731871de211..874df9cc791fcc 100644 --- a/src/coreclr/debug/createdump/dumpwriterelf.h +++ b/src/coreclr/debug/createdump/dumpwriterelf.h @@ -23,6 +23,8 @@ #define ELF_ARCH EM_ARM #elif defined(__loongarch64) #define ELF_ARCH EM_LOONGARCH +#elif defined(__riscv) +#define ELF_ARCH EM_RISCV #endif #define PH_HDR_CANARY 0xFFFF diff --git a/src/coreclr/debug/createdump/memoryregion.h b/src/coreclr/debug/createdump/memoryregion.h index f4c115d539b476..e7b8f31a0b2692 100644 --- a/src/coreclr/debug/createdump/memoryregion.h +++ b/src/coreclr/debug/createdump/memoryregion.h @@ -1,7 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -#if !defined(PAGE_SIZE) && (defined(__arm__) || defined(__aarch64__) || defined(__loongarch64)) +#if !defined(PAGE_SIZE) && (defined(__arm__) || defined(__aarch64__) || defined(__loongarch64)) || defined(__riscv) extern long g_pageSize; #define PAGE_SIZE g_pageSize #endif diff --git a/src/coreclr/debug/createdump/threadinfo.h b/src/coreclr/debug/createdump/threadinfo.h index ed82c1ec51a652..96fabbe6938dec 100644 --- a/src/coreclr/debug/createdump/threadinfo.h +++ b/src/coreclr/debug/createdump/threadinfo.h @@ -20,6 +20,14 @@ class CrashInfo; #define MCREG_Pc(mc) ((mc).pc) #endif +#if defined(__riscv) +// See src/coreclr/pal/src/include/pal/context.h +#define MCREG_Ra(mc) ((mc).ra) +#define MCREG_Fp(mc) ((mc).s0) +#define MCREG_Sp(mc) ((mc).sp) +#define MCREG_Pc(mc) ((mc).pc) +#endif + #define FPREG_ErrorOffset(fpregs) *(DWORD*)&((fpregs).rip) #define FPREG_ErrorSelector(fpregs) *(((WORD*)&((fpregs).rip)) + 2) #define FPREG_DataOffset(fpregs) *(DWORD*)&((fpregs).rdp) @@ -30,6 +38,12 @@ class CrashInfo; #elif defined(__loongarch64) // struct user_regs_struct {} defined `/usr/include/loongarch64-linux-gnu/sys/user.h` +struct user_fpregs_struct +{ + unsigned long long fpregs[32]; + unsigned long fpscr; +} __attribute__((__packed__)); +#elif defined(__riscv) struct user_fpregs_struct { unsigned long long fpregs[32]; @@ -154,6 +168,10 @@ class ThreadInfo inline const uint64_t GetInstructionPointer() const { return m_gpRegisters.ARM_pc; } inline const uint64_t GetStackPointer() const { return m_gpRegisters.ARM_sp; } inline const uint64_t GetFramePointer() const { return m_gpRegisters.ARM_fp; } +#elif defined(__riscv) + inline const uint64_t GetInstructionPointer() const { return MCREG_Pc(m_gpRegisters); } + inline const uint64_t GetStackPointer() const { return MCREG_Sp(m_gpRegisters); } + inline const uint64_t GetFramePointer() const { return MCREG_Fp(m_gpRegisters); } #endif #endif // __APPLE__ diff --git a/src/coreclr/debug/createdump/threadinfounix.cpp b/src/coreclr/debug/createdump/threadinfounix.cpp index c9ccc7f37d9a67..ca75a6128fee9e 100644 --- a/src/coreclr/debug/createdump/threadinfounix.cpp +++ b/src/coreclr/debug/createdump/threadinfounix.cpp @@ -59,6 +59,8 @@ ThreadInfo::Initialize() TRACE("Thread %04x RIP %016llx RSP %016llx\n", m_tid, (unsigned long long)m_gpRegisters.rip, (unsigned long long)m_gpRegisters.rsp); #elif defined(__loongarch64) TRACE("Thread %04x PC %016llx SP %016llx\n", m_tid, (unsigned long long)m_gpRegisters.pc, (unsigned long long)m_gpRegisters.gpr[3]); +#elif defined(__riscv) + TRACE("Thread %04x PC %016llx SP %016llx\n", m_tid, (unsigned long long)m_gpRegisters.pc, (unsigned long long)m_gpRegisters.sp); #else #error "Unsupported architecture" #endif @@ -243,6 +245,8 @@ ThreadInfo::GetThreadContext(uint32_t flags, CONTEXT* context) const memcpy(context->F, m_fpRegisters.fpregs, sizeof(context->F)); context->Fcsr = m_fpRegisters.fpscr; } +#elif defined(__riscv) + _ASSERTE(!"TODO RISCV64 NYI"); #else #error Platform not supported #endif diff --git a/src/coreclr/debug/daccess/daccess.cpp b/src/coreclr/debug/daccess/daccess.cpp index 3bc22eaae78f29..4baa2c6fabbf5c 100644 --- a/src/coreclr/debug/daccess/daccess.cpp +++ b/src/coreclr/debug/daccess/daccess.cpp @@ -5446,6 +5446,8 @@ ClrDataAccess::Initialize(void) CorDebugPlatform hostPlatform = CORDB_PLATFORM_POSIX_ARM64; #elif defined(TARGET_LOONGARCH64) CorDebugPlatform hostPlatform = CORDB_PLATFORM_POSIX_LOONGARCH64; + #elif defined(TARGET_RISCV64) + CorDebugPlatform hostPlatform = CORDB_PLATFORM_POSIX_RISCV64; #else #error Unknown Processor. #endif diff --git a/src/coreclr/debug/daccess/request.cpp b/src/coreclr/debug/daccess/request.cpp index 08e9b8265829bf..fb7955225587f0 100644 --- a/src/coreclr/debug/daccess/request.cpp +++ b/src/coreclr/debug/daccess/request.cpp @@ -559,6 +559,18 @@ ClrDataAccess::GetRegisterName(int regNum, unsigned int count, _Inout_updates_z_ W("S6"), W("S7"), W("K0"), W("K1"), W("GP"), W("SP"), W("FP"), W("RA") }; +#elif defined(TARGET_RISCV64) + static const WCHAR *regs[] = + { + W("R0"), W("RA"), W("SP"), W("GP"), + W("TP"), W("T0"), W("T1"), W("T2"), + W("FP"), W("S1"), W("A0"), W("A1"), + W("A2"), W("A3"), W("A4"), W("A5"), + W("A6"), W("A7"), W("S2"), W("S3"), + W("S4"), W("S5"), W("S6"), W("S7"), + W("S8"), W("S9"), W("S10"), W("S11"), + W("T3"), W("T4"), W("T5"), W("T6") + }; #endif // Caller frame registers are encoded as "-(reg+1)". diff --git a/src/coreclr/debug/di/CMakeLists.txt b/src/coreclr/debug/di/CMakeLists.txt index 9d84f90b1a5c6a..b5f6872b8f690a 100644 --- a/src/coreclr/debug/di/CMakeLists.txt +++ b/src/coreclr/debug/di/CMakeLists.txt @@ -66,7 +66,7 @@ if(CLR_CMAKE_HOST_WIN32) endif() elseif(CLR_CMAKE_HOST_UNIX) - if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_ARM OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) + if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_ARM OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64) set(CORDBDI_SOURCES_ASM_FILE ${ARCH_SOURCES_DIR}/floatconversion.S ) diff --git a/src/coreclr/debug/di/module.cpp b/src/coreclr/debug/di/module.cpp index d62659291ec7e1..15fae1581e2bff 100644 --- a/src/coreclr/debug/di/module.cpp +++ b/src/coreclr/debug/di/module.cpp @@ -4872,6 +4872,8 @@ int CordbNativeCode::GetCallInstructionLength(BYTE *ip, ULONG32 count) _ASSERTE(!"Invalid opcode!"); return -1; +#elif defined(TARGET_RISCV64) + return MAX_INSTRUCTION_LENGTH; #else #error Platform not implemented #endif diff --git a/src/coreclr/debug/di/platformspecific.cpp b/src/coreclr/debug/di/platformspecific.cpp index 2dbdbd2a407056..cd690dccc2fd25 100644 --- a/src/coreclr/debug/di/platformspecific.cpp +++ b/src/coreclr/debug/di/platformspecific.cpp @@ -36,6 +36,9 @@ #elif TARGET_LOONGARCH64 #include "loongarch64/cordbregisterset.cpp" #include "loongarch64/primitives.cpp" +#elif TARGET_RISCV64 +#include "riscv64/cordbregisterset.cpp" +#include "riscv64/primitives.cpp" #else #error Unsupported platform #endif diff --git a/src/coreclr/debug/di/riscv64/cordbregisterset.cpp b/src/coreclr/debug/di/riscv64/cordbregisterset.cpp new file mode 100644 index 00000000000000..7a80fd91dee26f --- /dev/null +++ b/src/coreclr/debug/di/riscv64/cordbregisterset.cpp @@ -0,0 +1,116 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +//***************************************************************************** +// File: CordbRegisterSet.cpp +// + +// +//***************************************************************************** +#include "primitives.h" + + +HRESULT CordbRegisterSet::GetRegistersAvailable(ULONG64* pAvailable) +{ + FAIL_IF_NEUTERED(this); + VALIDATE_POINTER_TO_OBJECT(pAvailable, ULONG64 *); + + *pAvailable = SETBITULONG64(REGISTER_RISCV64_PC) + | SETBITULONG64(REGISTER_RISCV64_RA) + | SETBITULONG64(REGISTER_RISCV64_SP) + | SETBITULONG64(REGISTER_RISCV64_GP) + | SETBITULONG64(REGISTER_RISCV64_TP) + | SETBITULONG64(REGISTER_RISCV64_T0) + | SETBITULONG64(REGISTER_RISCV64_T1) + | SETBITULONG64(REGISTER_RISCV64_T2) + | SETBITULONG64(REGISTER_RISCV64_FP) + | SETBITULONG64(REGISTER_RISCV64_S1) + | SETBITULONG64(REGISTER_RISCV64_A0) + | SETBITULONG64(REGISTER_RISCV64_A1) + | SETBITULONG64(REGISTER_RISCV64_A2) + | SETBITULONG64(REGISTER_RISCV64_A3) + | SETBITULONG64(REGISTER_RISCV64_A4) + | SETBITULONG64(REGISTER_RISCV64_A5) + | SETBITULONG64(REGISTER_RISCV64_A6) + | SETBITULONG64(REGISTER_RISCV64_A7) + | SETBITULONG64(REGISTER_RISCV64_S2) + | SETBITULONG64(REGISTER_RISCV64_S3) + | SETBITULONG64(REGISTER_RISCV64_S4) + | SETBITULONG64(REGISTER_RISCV64_S5) + | SETBITULONG64(REGISTER_RISCV64_S6) + | SETBITULONG64(REGISTER_RISCV64_S7) + | SETBITULONG64(REGISTER_RISCV64_S8) + | SETBITULONG64(REGISTER_RISCV64_S9) + | SETBITULONG64(REGISTER_RISCV64_S10) + | SETBITULONG64(REGISTER_RISCV64_S11) + | SETBITULONG64(REGISTER_RISCV64_T3) + | SETBITULONG64(REGISTER_RISCV64_T4) + | SETBITULONG64(REGISTER_RISCV64_T5) + | SETBITULONG64(REGISTER_RISCV64_T6) + | SETBITULONG64(REGISTER_RISCV64_F0) + | SETBITULONG64(REGISTER_RISCV64_F1) + | SETBITULONG64(REGISTER_RISCV64_F2) + | SETBITULONG64(REGISTER_RISCV64_F3) + | SETBITULONG64(REGISTER_RISCV64_F4) + | SETBITULONG64(REGISTER_RISCV64_F5) + | SETBITULONG64(REGISTER_RISCV64_F6) + | SETBITULONG64(REGISTER_RISCV64_F7) + | SETBITULONG64(REGISTER_RISCV64_F8) + | SETBITULONG64(REGISTER_RISCV64_F9) + | SETBITULONG64(REGISTER_RISCV64_F10) + | SETBITULONG64(REGISTER_RISCV64_F11) + | SETBITULONG64(REGISTER_RISCV64_F12) + | SETBITULONG64(REGISTER_RISCV64_F13) + | SETBITULONG64(REGISTER_RISCV64_F14) + | SETBITULONG64(REGISTER_RISCV64_F15) + | SETBITULONG64(REGISTER_RISCV64_F16) + | SETBITULONG64(REGISTER_RISCV64_F17) + | SETBITULONG64(REGISTER_RISCV64_F18) + | SETBITULONG64(REGISTER_RISCV64_F19) + | SETBITULONG64(REGISTER_RISCV64_F20) + | SETBITULONG64(REGISTER_RISCV64_F21) + | SETBITULONG64(REGISTER_RISCV64_F22) + | SETBITULONG64(REGISTER_RISCV64_F23) + | SETBITULONG64(REGISTER_RISCV64_F24) + | SETBITULONG64(REGISTER_RISCV64_F25) + | SETBITULONG64(REGISTER_RISCV64_F26) + | SETBITULONG64(REGISTER_RISCV64_F27) + | SETBITULONG64(REGISTER_RISCV64_F28) + | SETBITULONG64(REGISTER_RISCV64_F29) + | SETBITULONG64(REGISTER_RISCV64_F30) + | SETBITULONG64(REGISTER_RISCV64_F31); + + return S_OK; +} + +HRESULT CordbRegisterSet::GetRegisters(ULONG64 mask, ULONG32 regCount, + CORDB_REGISTER regBuffer[]) +{ + _ASSERTE(!"RISCV64:NYI"); + return S_OK; +} + + +HRESULT CordbRegisterSet::GetRegistersAvailable(ULONG32 regCount, + BYTE pAvailable[]) +{ + _ASSERTE(!"RISCV64:NYI"); + return S_OK; +} + + +HRESULT CordbRegisterSet::GetRegisters(ULONG32 maskCount, BYTE mask[], + ULONG32 regCount, CORDB_REGISTER regBuffer[]) +{ + _ASSERTE(!"RISCV64:NYI"); + return S_OK; +} + + +// This is just a convenience function to convert a regdisplay into a Context. +// Since a context has more info than a regdisplay, the conversion isn't perfect +// and the context can't be fully accurate. +void CordbRegisterSet::InternalCopyRDToContext(DT_CONTEXT *pInputContext) +{ + _ASSERTE(!"RISCV64:NYI"); +} diff --git a/src/coreclr/debug/di/riscv64/floatconversion.S b/src/coreclr/debug/di/riscv64/floatconversion.S new file mode 100644 index 00000000000000..9270623e2cfc79 --- /dev/null +++ b/src/coreclr/debug/di/riscv64/floatconversion.S @@ -0,0 +1,12 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +// Arguments +// input: (in A0) the value to be converted to a double +// output: the double corresponding to the _NEON128 input value +LEAF_ENTRY FPFillR8, .TEXT + ld a0, 0(a0) + jalr x0, ra, 0 +LEAF_END FPFillR8, .TEXT diff --git a/src/coreclr/pal/src/arch/riscv64/activationhandlerwrapper.S b/src/coreclr/debug/di/riscv64/primitives.cpp similarity index 57% rename from src/coreclr/pal/src/arch/riscv64/activationhandlerwrapper.S rename to src/coreclr/debug/di/riscv64/primitives.cpp index a7cd5b6c4d2403..97b053a8d3e9c6 100644 --- a/src/coreclr/pal/src/arch/riscv64/activationhandlerwrapper.S +++ b/src/coreclr/debug/di/riscv64/primitives.cpp @@ -1,7 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -#include "unixasmmacros.inc" -#include "asmconstants.h" +// -#error "TODO-RISCV64: missing implementation" +#include "../../shared/riscv64/primitives.cpp" diff --git a/src/coreclr/debug/di/rsthread.cpp b/src/coreclr/debug/di/rsthread.cpp index 075ed936fed530..5a2ea695ebc84c 100644 --- a/src/coreclr/debug/di/rsthread.cpp +++ b/src/coreclr/debug/di/rsthread.cpp @@ -8331,6 +8331,9 @@ HRESULT CordbJITILFrame::GetNativeVariable(CordbType *type, #elif defined(TARGET_LOONGARCH64) hr = m_nativeFrame->GetLocalFloatingPointValue(pNativeVarInfo->loc.vlReg.vlrReg + REGISTER_LOONGARCH64_F0, type, ppValue); +#elif defined(TARGET_RISCV64) + hr = m_nativeFrame->GetLocalFloatingPointValue(pNativeVarInfo->loc.vlReg.vlrReg + REGISTER_RISCV64_F0, + type, ppValue); #else #error Platform not implemented #endif // TARGET_ARM @ARMTODO @@ -8769,6 +8772,8 @@ HRESULT CordbJITILFrame::GetReturnValueForType(CordbType *pType, ICorDebugValue const CorDebugRegister floatRegister = REGISTER_ARM_D0; #elif defined(TARGET_LOONGARCH64) const CorDebugRegister floatRegister = REGISTER_LOONGARCH64_F0; +#elif defined(TARGET_RISCV64) + const CorDebugRegister floatRegister = REGISTER_RISCV64_F0; #endif #if defined(TARGET_X86) @@ -8783,6 +8788,8 @@ HRESULT CordbJITILFrame::GetReturnValueForType(CordbType *pType, ICorDebugValue const CorDebugRegister ptrHighWordRegister = REGISTER_ARM_R1; #elif defined(TARGET_LOONGARCH64) const CorDebugRegister ptrRegister = REGISTER_LOONGARCH64_A0; +#elif defined(TARGET_RISCV64) + const CorDebugRegister ptrRegister = REGISTER_RISCV64_A0; #endif CorElementType corReturnType = pType->GetElementType(); diff --git a/src/coreclr/debug/di/shimremotedatatarget.cpp b/src/coreclr/debug/di/shimremotedatatarget.cpp index 1a5fb8562a6be3..674d325560ff52 100644 --- a/src/coreclr/debug/di/shimremotedatatarget.cpp +++ b/src/coreclr/debug/di/shimremotedatatarget.cpp @@ -232,6 +232,8 @@ ShimRemoteDataTarget::GetPlatform( *pPlatform = CORDB_PLATFORM_POSIX_ARM64; #elif defined(TARGET_LOONGARCH64) *pPlatform = CORDB_PLATFORM_POSIX_LOONGARCH64; + #elif defined(TARGET_RISCV64) + *pPlatform = CORDB_PLATFORM_POSIX_RISCV64; #else #error Unknown Processor. #endif diff --git a/src/coreclr/debug/ee/riscv64/dbghelpers.S b/src/coreclr/debug/ee/riscv64/dbghelpers.S index 3515f38c8120d7..71b584d7aa6566 100644 --- a/src/coreclr/debug/ee/riscv64/dbghelpers.S +++ b/src/coreclr/debug/ee/riscv64/dbghelpers.S @@ -4,4 +4,40 @@ #include "asmconstants.h" #include "unixasmmacros.inc" -#error "TODO-RISCV64: missing implementation" +// +// hijacking stub used to perform a func-eval, see Debugger::FuncEvalSetup() for use. +// +// on entry: +// a0 : pointer to DebuggerEval object +// + +// @dbgtodo- once we port Funceval, use the ExceptionHijack stub instead of this func-eval stub. +NESTED_ENTRY FuncEvalHijack, _TEXT, UnhandledExceptionHandlerUnix + // NOTE: FuncEvalHijackPersonalityRoutine is dependent on the stack layout so if + // you change the prolog you will also need to update the personality routine. + + // push arg to the stack so our personality routine can find it + // push lr to get good stacktrace in debugger + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -32 + sd a0, 16(sp) + + // FuncEvalHijackWorker returns the address we should jump to. + call C_FUNC(FuncEvalHijackWorker) + + EPILOG_STACK_FREE 32 + EPILOG_BRANCH_REG a0 +NESTED_END FuncEvalHijack + +// This is the general purpose hijacking stub. The DacDbi Hijack primitive will +// set up the stack and then set the IP here, and so this just makes the call. +NESTED_ENTRY ExceptionHijack, _TEXT, UnhandledExceptionHandlerUnix + // make the call + call C_FUNC(ExceptionHijackWorker) + + // effective NOP to terminate unwind ??? + nop + + // *** should never get here *** + EMIT_BREAKPOINT +// exported label so the debugger knows where the end of this function is +NESTED_END ExceptionHijack, _TEXT diff --git a/src/coreclr/debug/ee/riscv64/primitives.cpp b/src/coreclr/debug/ee/riscv64/primitives.cpp index c4b50b4c66ef08..60bf9806f27b16 100644 --- a/src/coreclr/debug/ee/riscv64/primitives.cpp +++ b/src/coreclr/debug/ee/riscv64/primitives.cpp @@ -7,4 +7,9 @@ #include "threads.h" #include "../../shared/riscv64/primitives.cpp" -#error "TODO-RISCV64: missing implementation" +// #error "TODO-RISCV64: missing implementation" +void CopyREGDISPLAY(REGDISPLAY* pDst, REGDISPLAY* pSrc) +{ + CONTEXT tmp; + CopyRegDisplay(pSrc, pDst, &tmp); +} diff --git a/src/coreclr/debug/ee/riscv64/walker.cpp b/src/coreclr/debug/ee/riscv64/walker.cpp index c428cd8f3dbd23..4bf789d5f5c6ce 100644 --- a/src/coreclr/debug/ee/riscv64/walker.cpp +++ b/src/coreclr/debug/ee/riscv64/walker.cpp @@ -14,6 +14,6 @@ #ifdef TARGET_RISCV64 -#error "TODO-RISCV64: missing implementation" +// #error "TODO-RISCV64: missing implementation" #endif diff --git a/src/coreclr/debug/inc/dbgipcevents.h b/src/coreclr/debug/inc/dbgipcevents.h index f40eae3c6a4276..76b228b6ebf1bc 100644 --- a/src/coreclr/debug/inc/dbgipcevents.h +++ b/src/coreclr/debug/inc/dbgipcevents.h @@ -1895,6 +1895,13 @@ C_ASSERT(DBG_TARGET_REGNUM_AMBIENT_SP == ICorDebugInfo::REGNUM_AMBIENT_SP); C_ASSERT(DBG_TARGET_REGNUM_SP == ICorDebugInfo::REGNUM_SP); C_ASSERT(DBG_TARGET_REGNUM_AMBIENT_SP == ICorDebugInfo::REGNUM_AMBIENT_SP); #endif +#elif defined(TARGET_RISCV64) +#define DBG_TARGET_REGNUM_SP 2 +#define DBG_TARGET_REGNUM_AMBIENT_SP 34 +#ifdef TARGET_RISCV64 +C_ASSERT(DBG_TARGET_REGNUM_SP == ICorDebugInfo::REGNUM_SP); +C_ASSERT(DBG_TARGET_REGNUM_AMBIENT_SP == ICorDebugInfo::REGNUM_AMBIENT_SP); +#endif #else #error Target registers are not defined for this platform #endif diff --git a/src/coreclr/debug/inc/dbgtargetcontext.h b/src/coreclr/debug/inc/dbgtargetcontext.h index 403039e24d6a56..dc3b853014dee2 100644 --- a/src/coreclr/debug/inc/dbgtargetcontext.h +++ b/src/coreclr/debug/inc/dbgtargetcontext.h @@ -50,6 +50,8 @@ #define DTCONTEXT_IS_ARM64 #elif defined (TARGET_LOONGARCH64) #define DTCONTEXT_IS_LOONGARCH64 +#elif defined (TARGET_RISCV64) +#define DTCONTEXT_IS_RISCV64 #endif #if defined(DTCONTEXT_IS_X86) @@ -293,6 +295,7 @@ typedef struct DECLSPEC_ALIGN(16) { #define DT_ARM_MAX_BREAKPOINTS 8 #define DT_ARM_MAX_WATCHPOINTS 1 + typedef struct { ULONGLONG Low; LONGLONG High; @@ -513,6 +516,71 @@ typedef DECLSPEC_ALIGN(16) struct { ULONGLONG F[32]; } DT_CONTEXT; +#elif defined(DTCONTEXT_IS_RISCV64) +#define DT_CONTEXT_RISCV64 0x01000000L + +#define DT_CONTEXT_CONTROL (DT_CONTEXT_RISCV64 | 0x1L) +#define DT_CONTEXT_INTEGER (DT_CONTEXT_RISCV64 | 0x2L) +#define DT_CONTEXT_FLOATING_POINT (DT_CONTEXT_RISCV64 | 0x4L) +#define DT_CONTEXT_DEBUG_REGISTERS (DT_CONTEXT_RISCV64 | 0x8L) + +#define DT_CONTEXT_FULL (DT_CONTEXT_CONTROL | DT_CONTEXT_INTEGER | DT_CONTEXT_FLOATING_POINT) +#define DT_CONTEXT_ALL (DT_CONTEXT_CONTROL | DT_CONTEXT_INTEGER | DT_CONTEXT_FLOATING_POINT | DT_CONTEXT_DEBUG_REGISTERS) + +#define DT_RISCV64_MAX_BREAKPOINTS 8 +#define DT_RISCV64_MAX_WATCHPOINTS 2 + +typedef DECLSPEC_ALIGN(16) struct { + // + // Control flags. + // + + /* +0x000 */ DWORD ContextFlags; + + // + // Integer registers + // + DWORD64 ZR; + DWORD64 RA; + DWORD64 SP; + DWORD64 GP; + DWORD64 TP; + DWORD64 T0; + DWORD64 T1; + DWORD64 T2; + DWORD64 FP; + DWORD64 S1; + DWORD64 A0; + DWORD64 A1; + DWORD64 A2; + DWORD64 A3; + DWORD64 A4; + DWORD64 A5; + DWORD64 A6; + DWORD64 A7; + DWORD64 S2; + DWORD64 S3; + DWORD64 S4; + DWORD64 S5; + DWORD64 S6; + DWORD64 S7; + DWORD64 S8; + DWORD64 S9; + DWORD64 S10; + DWORD64 S11; + DWORD64 T3; + DWORD64 T4; + DWORD64 T5; + DWORD64 T6; + DWORD64 PC; + + // + // Floating Point Registers + // + ULONGLONG F[32]; +} DT_CONTEXT; + + #else #error Unsupported platform #endif diff --git a/src/coreclr/debug/inc/riscv64/primitives.h b/src/coreclr/debug/inc/riscv64/primitives.h new file mode 100644 index 00000000000000..99a1e28ce801c8 --- /dev/null +++ b/src/coreclr/debug/inc/riscv64/primitives.h @@ -0,0 +1,259 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +//***************************************************************************** +// File: primitives.h +// + +// +// Platform-specific debugger primitives +// +//***************************************************************************** + +#ifndef PRIMITIVES_H_ +#define PRIMITIVES_H_ + +typedef const BYTE CORDB_ADDRESS_TYPE; +typedef DPTR(CORDB_ADDRESS_TYPE) PTR_CORDB_ADDRESS_TYPE; + +#define MAX_INSTRUCTION_LENGTH 4 + +// Given a return address retrieved during stackwalk, +// this is the offset by which it should be decremented to land at the call instruction. +#define STACKWALK_CONTROLPC_ADJUST_OFFSET 4 + +#define PRD_TYPE LONG +#define CORDbg_BREAK_INSTRUCTION_SIZE 4 +#define CORDbg_BREAK_INSTRUCTION (LONG)0x00100073 + +inline CORDB_ADDRESS GetPatchEndAddr(CORDB_ADDRESS patchAddr) +{ + LIMITED_METHOD_DAC_CONTRACT; + return patchAddr + CORDbg_BREAK_INSTRUCTION_SIZE; +} + +#define InitializePRDToBreakInst(_pPRD) *(_pPRD) = CORDbg_BREAK_INSTRUCTION +#define PRDIsBreakInst(_pPRD) (*(_pPRD) == CORDbg_BREAK_INSTRUCTION) + + +#define CORDbgGetInstructionEx(_buffer, _requestedAddr, _patchAddr, _dummy1, _dummy2) \ + CORDbgGetInstructionExImpl((CORDB_ADDRESS_TYPE *)((_buffer) + (_patchAddr) - (_requestedAddr))); + +#define CORDbgSetInstructionEx(_buffer, _requestedAddr, _patchAddr, _opcode, _dummy2) \ + CORDbgSetInstructionExImpl((CORDB_ADDRESS_TYPE *)((_buffer) + (_patchAddr) - (_requestedAddr)), (_opcode)); + +#define CORDbgInsertBreakpointEx(_buffer, _requestedAddr, _patchAddr, _dummy1, _dummy2) \ + CORDbgInsertBreakpointExImpl((CORDB_ADDRESS_TYPE *)((_buffer) + (_patchAddr) - (_requestedAddr))); + + +constexpr CorDebugRegister g_JITToCorDbgReg[] = +{ + REGISTER_RISCV64_RA, + REGISTER_RISCV64_SP, + REGISTER_RISCV64_GP, + REGISTER_RISCV64_TP, + REGISTER_RISCV64_T0, + REGISTER_RISCV64_T1, + REGISTER_RISCV64_T2, + REGISTER_RISCV64_FP, + REGISTER_RISCV64_S1, + REGISTER_RISCV64_A0, + REGISTER_RISCV64_A1, + REGISTER_RISCV64_A2, + REGISTER_RISCV64_A3, + REGISTER_RISCV64_A4, + REGISTER_RISCV64_A5, + REGISTER_RISCV64_A6, + REGISTER_RISCV64_A7, + REGISTER_RISCV64_S2, + REGISTER_RISCV64_S3, + REGISTER_RISCV64_S4, + REGISTER_RISCV64_S5, + REGISTER_RISCV64_S6, + REGISTER_RISCV64_S7, + REGISTER_RISCV64_S8, + REGISTER_RISCV64_S9, + REGISTER_RISCV64_S10, + REGISTER_RISCV64_S11, + REGISTER_RISCV64_T3, + REGISTER_RISCV64_T4, + REGISTER_RISCV64_T5, + REGISTER_RISCV64_T6, + REGISTER_RISCV64_PC +}; + +inline void CORDbgSetIP(DT_CONTEXT *context, LPVOID ip) { + LIMITED_METHOD_CONTRACT; + + context->PC = (DWORD64)ip; +} + +inline LPVOID CORDbgGetSP(const DT_CONTEXT * context) { + LIMITED_METHOD_CONTRACT; + + return (LPVOID)(size_t)(context->SP); +} + +inline void CORDbgSetSP(DT_CONTEXT *context, LPVOID esp) { + LIMITED_METHOD_CONTRACT; + + context->SP = (DWORD64)esp; +} + +inline LPVOID CORDbgGetFP(const DT_CONTEXT * context) { + LIMITED_METHOD_CONTRACT; + + return (LPVOID)(size_t)(context->FP); +} + +inline void CORDbgSetFP(DT_CONTEXT *context, LPVOID fp) { + LIMITED_METHOD_CONTRACT; + + context->FP = (DWORD64)fp; +} + + +inline BOOL CompareControlRegisters(const DT_CONTEXT * pCtx1, const DT_CONTEXT * pCtx2) +{ + LIMITED_METHOD_DAC_CONTRACT; + + // TODO-LoongArch64: Sort out frame registers + + if ((pCtx1->PC == pCtx2->PC) && + (pCtx1->SP == pCtx2->SP) && + (pCtx1->FP == pCtx2->FP)) + { + return TRUE; + } + + return FALSE; +} + +inline void CORDbgSetInstruction(CORDB_ADDRESS_TYPE* address, + PRD_TYPE instruction) +{ + // In a DAC build, this function assumes the input is an host address. + LIMITED_METHOD_DAC_CONTRACT; + + ULONGLONG ptraddr = dac_cast(address); + *(PRD_TYPE *)ptraddr = instruction; + FlushInstructionCache(GetCurrentProcess(), + address, + sizeof(PRD_TYPE)); +} + +inline PRD_TYPE CORDbgGetInstruction(UNALIGNED CORDB_ADDRESS_TYPE* address) +{ + LIMITED_METHOD_CONTRACT; + + ULONGLONG ptraddr = dac_cast(address); + return *(PRD_TYPE *)ptraddr; +} + +// +// Mapping from ICorDebugInfo register numbers to CorDebugRegister +// numbers. Note: this must match the order in corinfo.h. +// +inline CorDebugRegister ConvertRegNumToCorDebugRegister(ICorDebugInfo::RegNum reg) +{ + LIMITED_METHOD_CONTRACT; + _ASSERTE(reg >= 0); + _ASSERTE(static_cast(reg) < ARRAY_SIZE(g_JITToCorDbgReg)); + return g_JITToCorDbgReg[reg]; +} + +inline LPVOID CORDbgGetIP(DT_CONTEXT *context) +{ + LIMITED_METHOD_CONTRACT; + + return (LPVOID)(size_t)(context->PC); +} + +inline void CORDbgSetInstructionExImpl(CORDB_ADDRESS_TYPE* address, + PRD_TYPE instruction) +{ + LIMITED_METHOD_DAC_CONTRACT; + + *(PRD_TYPE *)address = instruction; + FlushInstructionCache(GetCurrentProcess(), + address, + sizeof(PRD_TYPE)); +} + +inline PRD_TYPE CORDbgGetInstructionExImpl(UNALIGNED CORDB_ADDRESS_TYPE* address) +{ + LIMITED_METHOD_CONTRACT; + + return *(PRD_TYPE *)address; +} + +inline void CORDbgInsertBreakpoint(UNALIGNED CORDB_ADDRESS_TYPE *address) +{ + LIMITED_METHOD_CONTRACT; + + CORDbgSetInstruction(address, CORDbg_BREAK_INSTRUCTION); +} + +inline void CORDbgInsertBreakpointExImpl(UNALIGNED CORDB_ADDRESS_TYPE *address) +{ + LIMITED_METHOD_CONTRACT; + + CORDbgSetInstruction(address, CORDbg_BREAK_INSTRUCTION); +} + +// After a breakpoint exception, the CPU points to _after_ the break instruction. +// Adjust the IP so that it points at the break instruction. This lets us patch that +// opcode and re-execute what was underneath the bp. +inline void CORDbgAdjustPCForBreakInstruction(DT_CONTEXT* pContext) +{ + LIMITED_METHOD_CONTRACT; + + // LoongArch64 appears to leave the PC at the start of the breakpoint. + return; +} + +inline bool AddressIsBreakpoint(CORDB_ADDRESS_TYPE* address) +{ + LIMITED_METHOD_CONTRACT; + + return CORDbgGetInstruction(address) == CORDbg_BREAK_INSTRUCTION; +} + +inline void SetSSFlag(DT_CONTEXT *pContext) +{ + // TODO-LoongArch64: LoongArch64 doesn't support cpsr. + _ASSERTE(!"unimplemented on RISCV64 yet"); +} + +inline void UnsetSSFlag(DT_CONTEXT *pContext) +{ + // TODO-LoongArch64: LoongArch64 doesn't support cpsr. + _ASSERTE(!"unimplemented on RISCV64 yet"); +} + +inline bool IsSSFlagEnabled(DT_CONTEXT * pContext) +{ + // TODO-LoongArch64: LoongArch64 doesn't support cpsr. + _ASSERTE(!"unimplemented on RISCV64 yet"); + return false; +} + + +inline bool PRDIsEqual(PRD_TYPE p1, PRD_TYPE p2) +{ + return p1 == p2; +} + +inline void InitializePRD(PRD_TYPE *p1) +{ + *p1 = 0; +} + +inline bool PRDIsEmpty(PRD_TYPE p1) +{ + LIMITED_METHOD_CONTRACT; + + return p1 == 0; +} + +#endif // PRIMITIVES_H_ diff --git a/src/coreclr/debug/shared/riscv64/primitives.cpp b/src/coreclr/debug/shared/riscv64/primitives.cpp index 50eae7be8a7ea8..1a65ce043deb34 100644 --- a/src/coreclr/debug/shared/riscv64/primitives.cpp +++ b/src/coreclr/debug/shared/riscv64/primitives.cpp @@ -12,4 +12,19 @@ #include "primitives.h" -#error "TODO-RISCV64: missing implementation" +// #error "TODO-RISCV64: missing implementation" + +// CopyThreadContext() does an intelligent copy from pSrc to pDst, +// respecting the ContextFlags of both contexts. +// +void CORDbgCopyThreadContext(DT_CONTEXT* pDst, const DT_CONTEXT* pSrc) +{ + _ASSERTE(!"RISCV64:NYI"); +} + +#if defined(ALLOW_VMPTR_ACCESS) || !defined(RIGHT_SIDE_COMPILE) +void SetDebuggerREGDISPLAYFromREGDISPLAY(DebuggerREGDISPLAY* pDRD, REGDISPLAY* pRD) +{ + _ASSERTE(!"RISCV64:NYI"); +} +#endif // ALLOW_VMPTR_ACCESS || !RIGHT_SIDE_COMPILE diff --git a/src/coreclr/dlls/mscordac/CMakeLists.txt b/src/coreclr/dlls/mscordac/CMakeLists.txt index 25c2532358774b..ee29ceb80b2d5b 100644 --- a/src/coreclr/dlls/mscordac/CMakeLists.txt +++ b/src/coreclr/dlls/mscordac/CMakeLists.txt @@ -49,6 +49,8 @@ else(CLR_CMAKE_HOST_WIN32) if (CLR_CMAKE_HOST_ARCH_ARM OR CLR_CMAKE_HOST_ARCH_ARM64 OR CLR_CMAKE_HOST_ARCH_LOONGARCH64) set(JUMP_INSTRUCTION b) + elseif (CLR_CMAKE_HOST_ARCH_RISCV64) + set(JUMP_INSTRUCTION tail) else() set(JUMP_INSTRUCTION jmp) endif() diff --git a/src/coreclr/gc/env/gcenv.base.h b/src/coreclr/gc/env/gcenv.base.h index c6a73eb6afea5b..d6f6e6161b2543 100644 --- a/src/coreclr/gc/env/gcenv.base.h +++ b/src/coreclr/gc/env/gcenv.base.h @@ -226,6 +226,11 @@ typedef DWORD (WINAPI *PTHREAD_START_ROUTINE)(void* lpThreadParameter); #define MemoryBarrier __sync_synchronize #endif // __loongarch64 +#ifdef __riscv // TODO RISCV64 + #define YieldProcessor() asm volatile( "fence iorw, iorw"); // TODO + #define MemoryBarrier __sync_synchronize +#endif // __riscv + #endif // _MSC_VER #ifdef _MSC_VER diff --git a/src/coreclr/gc/env/volatile.h b/src/coreclr/gc/env/volatile.h index bef01f680e5465..c40e5c2d60964e 100644 --- a/src/coreclr/gc/env/volatile.h +++ b/src/coreclr/gc/env/volatile.h @@ -66,8 +66,8 @@ #error The Volatile type is currently only defined for Visual C++ and GNU C++ #endif -#if defined(__GNUC__) && !defined(HOST_X86) && !defined(HOST_AMD64) && !defined(HOST_ARM) && !defined(HOST_ARM64) && !defined(HOST_LOONGARCH64) && !defined(HOST_WASM) -#error The Volatile type is currently only defined for GCC when targeting x86, AMD64, ARM, ARM64, LOONGARCH64 or Wasm +#if defined(__GNUC__) && !defined(HOST_X86) && !defined(HOST_AMD64) && !defined(HOST_ARM) && !defined(HOST_ARM64) && !defined(HOST_LOONGARCH64) && !defined(HOST_WASM) && !defined(HOST_RISCV64) +#error The Volatile type is currently only defined for GCC when targeting x86, AMD64, ARM, ARM64, LOONGARCH64, Wasm, RISCV64 #endif #if defined(__GNUC__) @@ -76,6 +76,8 @@ #define VOLATILE_MEMORY_BARRIER() asm volatile ("dmb ish" : : : "memory") #elif defined(HOST_LOONGARCH64) #define VOLATILE_MEMORY_BARRIER() asm volatile ("dbar 0 " : : : "memory") +#elif defined(HOST_RISCV64) +#define VOLATILE_MEMORY_BARRIER() asm volatile ("fence rw,rw" : : : "memory") #else // // For GCC, we prevent reordering by the compiler by inserting the following after a volatile diff --git a/src/coreclr/gcdump/gcdumpnonx86.cpp b/src/coreclr/gcdump/gcdumpnonx86.cpp index f93e9bd42ca3ac..336dede0d608b3 100644 --- a/src/coreclr/gcdump/gcdumpnonx86.cpp +++ b/src/coreclr/gcdump/gcdumpnonx86.cpp @@ -72,6 +72,9 @@ PCSTR GetRegName (UINT32 regnum) #elif defined(TARGET_LOONGARCH64) assert(!"unimplemented on LOONGARCH yet"); return "???"; +#elif defined(TARGET_RISCV64) + assert(!"unimplemented on RISCV64 yet"); + return "???"; #endif } diff --git a/src/coreclr/gcinfo/CMakeLists.txt b/src/coreclr/gcinfo/CMakeLists.txt index a9d8a6f5848d0f..f1dcd7fc89d3a8 100644 --- a/src/coreclr/gcinfo/CMakeLists.txt +++ b/src/coreclr/gcinfo/CMakeLists.txt @@ -79,8 +79,12 @@ if (CLR_CMAKE_TARGET_ARCH_LOONGARCH64) create_gcinfo_lib(TARGET gcinfo_unix_loongarch64 OS unix ARCH loongarch64) endif (CLR_CMAKE_TARGET_ARCH_LOONGARCH64) -create_gcinfo_lib(TARGET gcinfo_universal_arm OS universal ARCH arm) -create_gcinfo_lib(TARGET gcinfo_win_x86 OS win ARCH x86) +if (CLR_CMAKE_TARGET_ARCH_RISCV64) + create_gcinfo_lib(TARGET gcinfo_unix_riscv64 OS unix ARCH riscv64) +else() + create_gcinfo_lib(TARGET gcinfo_universal_arm OS universal ARCH arm) + create_gcinfo_lib(TARGET gcinfo_win_x86 OS win ARCH x86) +endif (CLR_CMAKE_TARGET_ARCH_RISCV64) if (CLR_CMAKE_TARGET_ARCH_I386 AND CLR_CMAKE_TARGET_UNIX) create_gcinfo_lib(TARGET gcinfo_unix_x86 OS unix ARCH x86) diff --git a/src/coreclr/gcinfo/gcinfodumper.cpp b/src/coreclr/gcinfo/gcinfodumper.cpp index ba70d9c68c8578..be45bc3feb2ee2 100644 --- a/src/coreclr/gcinfo/gcinfodumper.cpp +++ b/src/coreclr/gcinfo/gcinfodumper.cpp @@ -224,6 +224,45 @@ BOOL GcInfoDumper::ReportPointerRecord ( REG(ra, Ra), { offsetof(T_CONTEXT, Sp) }, #undef REG +#elif defined(TARGET_RISCV64) +#undef REG +#define REG(reg, field) { offsetof(Riscv64VolatileContextPointer, field) } + REG(zero, R0), + REG(a0, A0), + REG(a1, A1), + REG(a2, A2), + REG(a3, A3), + REG(a4, A4), + REG(a5, A5), + REG(a6, A6), + REG(a7, A7), + REG(t0, T0), + REG(t1, T1), + REG(t2, T2), + REG(t3, T3), + REG(t4, T4), + REG(t5, T5), + REG(t6, T6), +#undef REG +#define REG(reg, field) { offsetof(T_KNONVOLATILE_CONTEXT_POINTERS, field) } + REG(s1, S1), + REG(s2, S2), + REG(s3, S3), + REG(s4, S4), + REG(s5, S5), + REG(s6, S6), + REG(s7, S7), + REG(s8, S8), + REG(s9, S9), + REG(s10, S10), + REG(s11, S11), + REG(ra, Ra), + REG(gp, Gp), + REG(tp, Tp), + REG(fp, Fp), + { offsetof(T_CONTEXT, Sp) }, +#undef REG + #else PORTABILITY_ASSERT("GcInfoDumper::ReportPointerRecord is not implemented on this platform.") #endif @@ -248,6 +287,9 @@ PORTABILITY_ASSERT("GcInfoDumper::ReportPointerRecord is not implemented on this #elif defined(TARGET_LOONGARCH64) assert(!"unimplemented on LOONGARCH yet"); iSPRegister = 0; +#elif defined(TARGET_RISCV64) + assert(!"unimplemented on RISCV64 yet"); + iSPRegister = 0; #endif #if defined(TARGET_ARM) || defined(TARGET_ARM64) @@ -660,8 +702,11 @@ GcInfoDumper::EnumerateStateChangesResults GcInfoDumper::EnumerateStateChanges ( #elif defined(TARGET_LOONGARCH64) #pragma message("Unimplemented for LOONGARCH64 yet.") assert(!"unimplemented on LOONGARCH yet"); +#elif defined(TARGET_RISCV64) +#pragma message("Unimplemented for RISCV64 yet.") + assert(!"unimplemented on RISCV64 yet"); // TODO RISCV64 #else -PORTABILITY_ASSERT("GcInfoDumper::EnumerateStateChanges is not implemented on this platform.") +PORTABILITY_ASSERT("GcInfoDumper::EnumerateStateChanges is not implemented on this platform."); #endif #undef FILL_REGS diff --git a/src/coreclr/gcinfo/gcinfoencoder.cpp b/src/coreclr/gcinfo/gcinfoencoder.cpp index 9564e3622ee6fd..73d0e29ca5f40d 100644 --- a/src/coreclr/gcinfo/gcinfoencoder.cpp +++ b/src/coreclr/gcinfo/gcinfoencoder.cpp @@ -479,7 +479,7 @@ GcInfoEncoder::GcInfoEncoder( m_ReversePInvokeFrameSlot = NO_REVERSE_PINVOKE_FRAME; #ifdef TARGET_AMD64 m_WantsReportOnlyLeaf = false; -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) m_HasTailCalls = false; #endif // TARGET_AMD64 m_IsVarArg = false; @@ -729,6 +729,8 @@ void GcInfoEncoder::SetStackBaseRegister( UINT32 regNum ) _ASSERTE( m_StackBaseRegister == NO_STACK_BASE_REGISTER || m_StackBaseRegister == regNum ); #if defined(TARGET_LOONGARCH64) assert(regNum == 3 || 22 == regNum); +#elif defined(TARGET_RISCV64) + assert(regNum == 2 || 8 == regNum); #endif m_StackBaseRegister = regNum; } @@ -752,7 +754,7 @@ void GcInfoEncoder::SetWantsReportOnlyLeaf() { m_WantsReportOnlyLeaf = true; } -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void GcInfoEncoder::SetHasTailCalls() { m_HasTailCalls = true; @@ -1011,7 +1013,7 @@ void GcInfoEncoder::Build() (m_SizeOfEditAndContinuePreservedArea == NO_SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA) && #ifdef TARGET_AMD64 !m_WantsReportOnlyLeaf && -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) !m_HasTailCalls && #endif // TARGET_AMD64 !IsStructReturnKind(m_ReturnKind); @@ -1024,6 +1026,8 @@ void GcInfoEncoder::Build() GCINFO_WRITE(m_Info1, 0, 1, FlagsSize); // Slim encoding #if defined(TARGET_LOONGARCH64) assert(m_StackBaseRegister == 22 || 3 == m_StackBaseRegister); +#elif defined(TARGET_RISCV64) + assert(m_StackBaseRegister == 8 || 2 == m_StackBaseRegister); #endif GCINFO_WRITE(m_Info1, (m_StackBaseRegister == NO_STACK_BASE_REGISTER) ? 0 : 1, 1, FlagsSize); @@ -1039,11 +1043,13 @@ void GcInfoEncoder::Build() GCINFO_WRITE(m_Info1, m_contextParamType, 2, FlagsSize); #if defined(TARGET_LOONGARCH64) assert(m_StackBaseRegister == 22 || 3 == m_StackBaseRegister); +#elif defined(TARGET_RISCV64) + assert(m_StackBaseRegister == 8 || 2 == m_StackBaseRegister); #endif GCINFO_WRITE(m_Info1, ((m_StackBaseRegister != NO_STACK_BASE_REGISTER) ? 1 : 0), 1, FlagsSize); #ifdef TARGET_AMD64 GCINFO_WRITE(m_Info1, (m_WantsReportOnlyLeaf ? 1 : 0), 1, FlagsSize); -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) GCINFO_WRITE(m_Info1, (m_HasTailCalls ? 1 : 0), 1, FlagsSize); #endif // TARGET_AMD64 GCINFO_WRITE(m_Info1, ((m_SizeOfEditAndContinuePreservedArea != NO_SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA) ? 1 : 0), 1, FlagsSize); @@ -1129,6 +1135,8 @@ void GcInfoEncoder::Build() { #if defined(TARGET_LOONGARCH64) assert(m_StackBaseRegister == 22 || 3 == m_StackBaseRegister); +#elif defined(TARGET_RISCV64) + assert(m_StackBaseRegister == 8 || 2 == m_StackBaseRegister); #endif GCINFO_WRITE_VARL_U(m_Info1, NORMALIZE_STACK_BASE_REGISTER(m_StackBaseRegister), STACK_BASE_REGISTER_ENCBASE, StackBaseSize); } diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 7fe6e3ff01eec5..982f704f5732ca 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -293,7 +293,12 @@ CONFIG_DWORD_INFO(INTERNAL_JitDebuggable, W("JitDebuggable"), 0, "") #define INTERNAL_JitEnableNoWayAssert_Default 1 #endif RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitEnableNoWayAssert, W("JitEnableNoWayAssert"), INTERNAL_JitEnableNoWayAssert_Default, "") + +#if defined(TARGET_RISCV64) +RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_JitFramed, W("JitFramed"), 1, "Forces EBP frames") +#else RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_JitFramed, W("JitFramed"), 0, "Forces EBP frames") +#endif // TARGET_RISCV64 CONFIG_DWORD_INFO(INTERNAL_JitThrowOnAssertionFailure, W("JitThrowOnAssertionFailure"), 0, "Throw managed exception on assertion failures during JIT instead of failfast") CONFIG_DWORD_INFO(INTERNAL_JitGCStress, W("JitGCStress"), 0, "GC stress mode for jit") CONFIG_DWORD_INFO(INTERNAL_JitHeartbeat, W("JitHeartbeat"), 0, "") @@ -736,12 +741,12 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_GDBJitEmitDebugFrame, W("GDBJitEmitDebugFrame" // // Hardware Intrinsic ISAs; keep in sync with jitconfigvalues.h // -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) //TODO: should implement LoongArch64's features. RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 0, "Allows Base+ hardware intrinsics to be disabled") #else RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1, "Allows Base+ hardware intrinsics to be disabled") -#endif // defined(TARGET_LOONGARCH64) +#endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(TARGET_AMD64) || defined(TARGET_X86) RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAES, W("EnableAES"), 1, "Allows AES+ hardware intrinsics to be disabled") diff --git a/src/coreclr/inc/clrnt.h b/src/coreclr/inc/clrnt.h index 0bba92c7e3a2f9..3da9a6abf56787 100644 --- a/src/coreclr/inc/clrnt.h +++ b/src/coreclr/inc/clrnt.h @@ -1084,4 +1084,62 @@ RtlVirtualUnwind( #endif // TARGET_LOONGARCH64 +#ifdef TARGET_RISCV64 +#include "daccess.h" + +#define UNW_FLAG_NHANDLER 0x0 /* any handler */ +#define UNW_FLAG_EHANDLER 0x1 /* filter handler */ +#define UNW_FLAG_UHANDLER 0x2 /* unwind handler */ + +// This function returns the RVA of the end of the function (exclusive, so one byte after the actual end) +// using the unwind info on ARM64. (see ExternalAPIs\Win9CoreSystem\inc\winnt.h) +FORCEINLINE +ULONG64 +RtlpGetFunctionEndAddress ( + _In_ PT_RUNTIME_FUNCTION FunctionEntry, + _In_ ULONG64 ImageBase + ) +{ + // TODO RISCV64 + ULONG64 FunctionLength; + + FunctionLength = FunctionEntry->UnwindData; + if ((FunctionLength & 3) != 0) { + FunctionLength = (FunctionLength >> 2) & 0x7ff; + } else { + memcpy(&FunctionLength, (void*)(ImageBase + FunctionLength), sizeof(UINT32)); + FunctionLength &= 0x3ffff; + } + + return FunctionEntry->BeginAddress + 4 * FunctionLength; +} + +#define RUNTIME_FUNCTION__BeginAddress(FunctionEntry) ((FunctionEntry)->BeginAddress) +#define RUNTIME_FUNCTION__SetBeginAddress(FunctionEntry,address) ((FunctionEntry)->BeginAddress = (address)) + +#define RUNTIME_FUNCTION__EndAddress(FunctionEntry, ImageBase) (RtlpGetFunctionEndAddress(FunctionEntry, (ULONG64)(ImageBase))) + +#define RUNTIME_FUNCTION__SetUnwindInfoAddress(prf,address) do { (prf)->UnwindData = (address); } while (0) + +typedef struct _UNWIND_INFO { + // dummy +} UNWIND_INFO, *PUNWIND_INFO; + +EXTERN_C +NTSYSAPI +PEXCEPTION_ROUTINE +NTAPI +RtlVirtualUnwind( + IN ULONG HandlerType, + IN ULONG64 ImageBase, + IN ULONG64 ControlPc, + IN PRUNTIME_FUNCTION FunctionEntry, + IN OUT PCONTEXT ContextRecord, + OUT PVOID *HandlerData, + OUT PULONG64 EstablisherFrame, + IN OUT PKNONVOLATILE_CONTEXT_POINTERS ContextPointers OPTIONAL + ); + +#endif // TARGET_RISCV64 + #endif // CLRNT_H_ diff --git a/src/coreclr/inc/cordebuginfo.h b/src/coreclr/inc/cordebuginfo.h index 35e0eca6fd7894..01780570570e48 100644 --- a/src/coreclr/inc/cordebuginfo.h +++ b/src/coreclr/inc/cordebuginfo.h @@ -179,6 +179,40 @@ class ICorDebugInfo REGNUM_S7, REGNUM_S8, REGNUM_PC, +#elif TARGET_RISCV64 + REGNUM_R0, + REGNUM_RA, + REGNUM_SP, + REGNUM_GP, + REGNUM_TP, + REGNUM_T0, + REGNUM_T1, + REGNUM_T2, + REGNUM_S0, + REGNUM_S1, + REGNUM_A0, + REGNUM_A1, + REGNUM_A2, + REGNUM_A3, + REGNUM_A4, + REGNUM_A5, + REGNUM_A6, + REGNUM_A7, + REGNUM_S2, + REGNUM_S3, + REGNUM_S4, + REGNUM_S5, + REGNUM_S6, + REGNUM_S7, + REGNUM_S8, + REGNUM_S9, + REGNUM_S10, + REGNUM_S11, + REGNUM_T3, + REGNUM_T4, + REGNUM_T5, + REGNUM_T6, + REGNUM_PC, #else PORTABILITY_WARNING("Register numbers not defined on this platform") #endif @@ -201,6 +235,8 @@ class ICorDebugInfo //Nothing to do here. FP is already alloted. #elif TARGET_LOONGARCH64 //Nothing to do here. FP is already alloted. +#elif TARGET_RISCV64 + //Nothing to do here. FP is already alloted. #else // RegNum values should be properly defined for this platform REGNUM_FP = 0, diff --git a/src/coreclr/inc/crosscomp.h b/src/coreclr/inc/crosscomp.h index 7353e8cc6b770d..12adb32cc409db 100644 --- a/src/coreclr/inc/crosscomp.h +++ b/src/coreclr/inc/crosscomp.h @@ -517,6 +517,148 @@ typedef struct _T_KNONVOLATILE_CONTEXT_POINTERS { PDWORD64 F31; } T_KNONVOLATILE_CONTEXT_POINTERS, *PT_KNONVOLATILE_CONTEXT_POINTERS; +#elif defined(HOST_AMD64) && defined(TARGET_RISCV64) // Host amd64 managing RISCV64 related code + +#ifndef CROSS_COMPILE +#define CROSS_COMPILE +#endif + +// +// Specify the number of breakpoints and watchpoints that the OS +// will track. Architecturally, LOONGARCH64 supports up to 16. In practice, +// however, almost no one implements more than 4 of each. +// + +#define RISCV64_MAX_BREAKPOINTS 8 +#define RISCV64_MAX_WATCHPOINTS 2 + +#define CONTEXT_UNWOUND_TO_CALL 0x20000000 + +typedef struct DECLSPEC_ALIGN(16) _T_CONTEXT { + + // + // Control flags. + // + + /* +0x000 */ DWORD ContextFlags; + + // + // Integer registers + // + DWORD64 R0; + DWORD64 Ra; + DWORD64 Sp; + DWORD64 Gp; + DWORD64 Tp; + DWORD64 T0; + DWORD64 T1; + DWORD64 T2; + DWORD64 Fp; + DWORD64 S1; + DWORD64 A0; + DWORD64 A1; + DWORD64 A2; + DWORD64 A3; + DWORD64 A4; + DWORD64 A5; + DWORD64 A6; + DWORD64 A7; + DWORD64 S2; + DWORD64 S3; + DWORD64 S4; + DWORD64 S5; + DWORD64 S6; + DWORD64 S7; + DWORD64 S8; + DWORD64 S9; + DWORD64 S10; + DWORD64 S11; + DWORD64 T3; + DWORD64 T4; + DWORD64 T5; + DWORD64 T6; + DWORD64 Pc; + + // + // Floating Point Registers + // + //TODO-RISCV64: support the SIMD. + ULONGLONG F[32]; + DWORD Fcsr; +} T_CONTEXT, *PT_CONTEXT; + +// _IMAGE_RISCV64_RUNTIME_FUNCTION_ENTRY (see ExternalAPIs\Win9CoreSystem\inc\winnt.h) +typedef struct _T_RUNTIME_FUNCTION { + DWORD BeginAddress; + union { + DWORD UnwindData; + struct { + DWORD Flag : 2; + DWORD FunctionLength : 11; + DWORD RegF : 3; + DWORD RegI : 4; + DWORD H : 1; + DWORD CR : 2; + DWORD FrameSize : 9; + } PackedUnwindData; + }; +} T_RUNTIME_FUNCTION, *PT_RUNTIME_FUNCTION; + +// +// Define exception dispatch context structure. +// + +typedef struct _T_DISPATCHER_CONTEXT { + DWORD64 ControlPc; + DWORD64 ImageBase; + PT_RUNTIME_FUNCTION FunctionEntry; + DWORD64 EstablisherFrame; + DWORD64 TargetPc; + PCONTEXT ContextRecord; + PEXCEPTION_ROUTINE LanguageHandler; + PVOID HandlerData; + PVOID HistoryTable; + DWORD ScopeIndex; + BOOLEAN ControlPcIsUnwound; + PBYTE NonVolatileRegisters; +} T_DISPATCHER_CONTEXT, *PT_DISPATCHER_CONTEXT; + +// +// Nonvolatile context pointer record. +// + +typedef struct _T_KNONVOLATILE_CONTEXT_POINTERS { + + PDWORD64 S1; + PDWORD64 S2; + PDWORD64 S3; + PDWORD64 S4; + PDWORD64 S5; + PDWORD64 S6; + PDWORD64 S7; + PDWORD64 S8; + PDWORD64 S9; + PDWORD64 S10; + PDWORD64 S11; + PDWORD64 Fp; + PDWORD64 Gp; + PDWORD64 Tp; + PDWORD64 Ra; + + PDWORD64 F8; + PDWORD64 F9; + PDWORD64 F18; + PDWORD64 F19; + PDWORD64 F20; + PDWORD64 F21; + PDWORD64 F22; + PDWORD64 F23; + PDWORD64 F24; + PDWORD64 F25; + PDWORD64 F26; + PDWORD64 F27; +} T_KNONVOLATILE_CONTEXT_POINTERS, *PT_KNONVOLATILE_CONTEXT_POINTERS; + #else #define T_CONTEXT CONTEXT diff --git a/src/coreclr/inc/eetwain.h b/src/coreclr/inc/eetwain.h index 5b20a4ef3a2365..bb5bf893b99ee7 100644 --- a/src/coreclr/inc/eetwain.h +++ b/src/coreclr/inc/eetwain.h @@ -211,9 +211,9 @@ virtual bool UnwindStackFrame(PREGDISPLAY pContext, virtual bool IsGcSafe(EECodeInfo *pCodeInfo, DWORD dwRelOffset) = 0; -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) virtual bool HasTailCalls(EECodeInfo *pCodeInfo) = 0; -#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 #if defined(TARGET_AMD64) && defined(_DEBUG) /* @@ -455,10 +455,10 @@ virtual bool IsGcSafe( EECodeInfo *pCodeInfo, DWORD dwRelOffset); -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) virtual bool HasTailCalls(EECodeInfo *pCodeInfo); -#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || defined(TARGET_RISCV64) #if defined(TARGET_AMD64) && defined(_DEBUG) /* diff --git a/src/coreclr/inc/eexcp.h b/src/coreclr/inc/eexcp.h index 2de046d70dec8a..fb7bccbe073469 100644 --- a/src/coreclr/inc/eexcp.h +++ b/src/coreclr/inc/eexcp.h @@ -117,7 +117,7 @@ inline BOOL IsDuplicateClause(EE_ILEXCEPTION_CLAUSE* pEHClause) return pEHClause->Flags & COR_ILEXCEPTION_CLAUSE_DUPLICATED; } -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Finally is the only EH construct that can be part of the execution as being fall-through. // // "Cloned" finally is a construct that represents a finally block that is used as @@ -139,7 +139,7 @@ inline BOOL IsClonedFinally(EE_ILEXCEPTION_CLAUSE* pEHClause) (pEHClause->TryStartPC == pEHClause->HandlerStartPC) && IsFinally(pEHClause) && IsDuplicateClause(pEHClause)); } -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #endif // __eexcp_h__ diff --git a/src/coreclr/inc/gcinfodecoder.h b/src/coreclr/inc/gcinfodecoder.h index 93bdd33ed3973e..f65cb5846bfbf5 100644 --- a/src/coreclr/inc/gcinfodecoder.h +++ b/src/coreclr/inc/gcinfodecoder.h @@ -216,7 +216,7 @@ enum GcInfoDecoderFlags DECODE_EDIT_AND_CONTINUE = 0x800, DECODE_REVERSE_PINVOKE_VAR = 0x1000, DECODE_RETURN_KIND = 0x2000, -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) DECODE_HAS_TAILCALLS = 0x4000, #endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 }; @@ -235,7 +235,7 @@ enum GcInfoHeaderFlags GC_INFO_HAS_STACK_BASE_REGISTER = 0x40, #ifdef TARGET_AMD64 GC_INFO_WANTS_REPORT_ONLY_LEAF = 0x80, -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) GC_INFO_HAS_TAILCALLS = 0x80, #endif // TARGET_AMD64 GC_INFO_HAS_EDIT_AND_CONTINUE_INFO = 0x100, @@ -539,9 +539,9 @@ class GcInfoDecoder bool HasMethodTableGenericsInstContext(); bool GetIsVarArg(); bool WantsReportOnlyLeaf(); -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool HasTailCalls(); -#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || defined(TARGET_RISCV64) ReturnKind GetReturnKind(); UINT32 GetCodeLength(); UINT32 GetStackBaseRegister(); @@ -567,7 +567,7 @@ class GcInfoDecoder bool m_GenericSecretParamIsMT; #ifdef TARGET_AMD64 bool m_WantsReportOnlyLeaf; -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool m_HasTailCalls; #endif // TARGET_AMD64 INT32 m_GSCookieStackSlot; diff --git a/src/coreclr/inc/gcinfoencoder.h b/src/coreclr/inc/gcinfoencoder.h index 490aacf8ede25c..ea71e14d8dbfe0 100644 --- a/src/coreclr/inc/gcinfoencoder.h +++ b/src/coreclr/inc/gcinfoencoder.h @@ -442,7 +442,7 @@ class GcInfoEncoder // instead of once for each live function/funclet on the stack. // Called only by RyuJIT (not JIT64) void SetWantsReportOnlyLeaf(); -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void SetHasTailCalls(); #endif // TARGET_AMD64 @@ -494,7 +494,7 @@ class GcInfoEncoder bool m_IsVarArg; #if defined(TARGET_AMD64) bool m_WantsReportOnlyLeaf; -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool m_HasTailCalls; #endif // TARGET_AMD64 INT32 m_GSCookieStackSlot; diff --git a/src/coreclr/inc/gcinfotypes.h b/src/coreclr/inc/gcinfotypes.h index 57cae4d264b719..33759d14d949ef 100644 --- a/src/coreclr/inc/gcinfotypes.h +++ b/src/coreclr/inc/gcinfotypes.h @@ -156,7 +156,7 @@ struct GcStackSlot // 10 RT_ByRef // 11 RT_Unset -#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Slim Header: @@ -829,6 +829,63 @@ void FASTCALL decodeCallPattern(int pattern, #define LIVESTATE_RLE_RUN_ENCBASE 2 #define LIVESTATE_RLE_SKIP_ENCBASE 4 +#elif defined(TARGET_RISCV64) +#ifndef TARGET_POINTER_SIZE +#define TARGET_POINTER_SIZE 8 // equal to sizeof(void*) and the managed pointer size in bytes for this target +#endif +#define NUM_NORM_CODE_OFFSETS_PER_CHUNK (64) +#define NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 (6) +#define NORMALIZE_STACK_SLOT(x) ((x)>>3) // GC Pointers are 8-bytes aligned +#define DENORMALIZE_STACK_SLOT(x) ((x)<<3) +#define NORMALIZE_CODE_LENGTH(x) ((x)>>2) // All Instructions are 4 bytes long +#define DENORMALIZE_CODE_LENGTH(x) ((x)<<2) +#define NORMALIZE_STACK_BASE_REGISTER(x) ((x)^8) // Encode Frame pointer X8 as zero +#define DENORMALIZE_STACK_BASE_REGISTER(x) ((x)^8) +#define NORMALIZE_SIZE_OF_STACK_AREA(x) ((x)>>3) +#define DENORMALIZE_SIZE_OF_STACK_AREA(x) ((x)<<3) +#define CODE_OFFSETS_NEED_NORMALIZATION 0 +#define NORMALIZE_CODE_OFFSET(x) (x) // Instructions are 4 bytes long, but the safe-point +#define DENORMALIZE_CODE_OFFSET(x) (x) // offsets are encoded with a -1 adjustment. +#define NORMALIZE_REGISTER(x) (x) +#define DENORMALIZE_REGISTER(x) (x) +#define NORMALIZE_NUM_SAFE_POINTS(x) (x) +#define DENORMALIZE_NUM_SAFE_POINTS(x) (x) +#define NORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x) +#define DENORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x) + +#define PSP_SYM_STACK_SLOT_ENCBASE 6 +#define GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE 6 +#define SECURITY_OBJECT_STACK_SLOT_ENCBASE 6 +#define GS_COOKIE_STACK_SLOT_ENCBASE 6 +#define CODE_LENGTH_ENCBASE 8 +#define SIZE_OF_RETURN_KIND_IN_SLIM_HEADER 2 +#define SIZE_OF_RETURN_KIND_IN_FAT_HEADER 4 +#define STACK_BASE_REGISTER_ENCBASE 2 // TODO for RISCV64 +// FP encoded as 0, SP as 2?? +#define SIZE_OF_STACK_AREA_ENCBASE 3 +#define SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE 4 +#define SIZE_OF_EDIT_AND_CONTINUE_FIXED_STACK_FRAME_ENCBASE 4 +#define REVERSE_PINVOKE_FRAME_ENCBASE 6 +#define NUM_REGISTERS_ENCBASE 3 +#define NUM_STACK_SLOTS_ENCBASE 2 +#define NUM_UNTRACKED_SLOTS_ENCBASE 1 +#define NORM_PROLOG_SIZE_ENCBASE 5 +#define NORM_EPILOG_SIZE_ENCBASE 3 +#define NORM_CODE_OFFSET_DELTA_ENCBASE 3 +#define INTERRUPTIBLE_RANGE_DELTA1_ENCBASE 6 +#define INTERRUPTIBLE_RANGE_DELTA2_ENCBASE 6 +#define REGISTER_ENCBASE 3 +#define REGISTER_DELTA_ENCBASE 2 +#define STACK_SLOT_ENCBASE 6 +#define STACK_SLOT_DELTA_ENCBASE 4 +#define NUM_SAFE_POINTS_ENCBASE 3 +#define NUM_INTERRUPTIBLE_RANGES_ENCBASE 1 +#define NUM_EH_CLAUSES_ENCBASE 2 +#define POINTER_SIZE_ENCBASE 3 +#define LIVESTATE_RLE_RUN_ENCBASE 2 +#define LIVESTATE_RLE_SKIP_ENCBASE 4 + + #else #ifndef TARGET_X86 diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index 601588f98167e8..11cd8e36b8283b 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -325,7 +325,7 @@ JITHELPER(CORINFO_HELP_GVMLOOKUP_FOR_SLOT, NULL, CORINFO_HELP_SIG_NO_ALIGN_STUB) -#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) JITHELPER(CORINFO_HELP_STACK_PROBE, JIT_StackProbe, CORINFO_HELP_SIG_REG_ONLY) #else JITHELPER(CORINFO_HELP_STACK_PROBE, NULL, CORINFO_HELP_SIG_UNDEF) diff --git a/src/coreclr/inc/pedecoder.h b/src/coreclr/inc/pedecoder.h index 17baa6483079cb..4ada5241163b7c 100644 --- a/src/coreclr/inc/pedecoder.h +++ b/src/coreclr/inc/pedecoder.h @@ -87,6 +87,8 @@ inline CHECK CheckOverflow(RVA value1, COUNT_T value2) #define IMAGE_FILE_MACHINE_NATIVE IMAGE_FILE_MACHINE_POWERPC #elif defined(TARGET_S390X) #define IMAGE_FILE_MACHINE_NATIVE IMAGE_FILE_MACHINE_UNKNOWN +#elif defined(TARGET_RISCV64) +#define IMAGE_FILE_MACHINE_NATIVE IMAGE_FILE_MACHINE_RISCV64 #else #error "port me" #endif diff --git a/src/coreclr/inc/regdisp.h b/src/coreclr/inc/regdisp.h index b6ccc87ff6d876..e7a230d2dab834 100644 --- a/src/coreclr/inc/regdisp.h +++ b/src/coreclr/inc/regdisp.h @@ -210,6 +210,28 @@ typedef struct _Loongarch64VolatileContextPointer } Loongarch64VolatileContextPointer; #endif +#if defined(TARGET_RISCV64) +typedef struct _Riscv64VolatileContextPointer +{ + PDWORD64 R0; + PDWORD64 A0; + PDWORD64 A1; + PDWORD64 A2; + PDWORD64 A3; + PDWORD64 A4; + PDWORD64 A5; + PDWORD64 A6; + PDWORD64 A7; + PDWORD64 T0; + PDWORD64 T1; + PDWORD64 T2; + PDWORD64 T3; + PDWORD64 T4; + PDWORD64 T5; + PDWORD64 T6; +} Riscv64VolatileContextPointer; +#endif + struct REGDISPLAY : public REGDISPLAY_BASE { #ifdef TARGET_ARM64 Arm64VolatileContextPointer volatileCurrContextPointers; @@ -219,6 +241,10 @@ struct REGDISPLAY : public REGDISPLAY_BASE { Loongarch64VolatileContextPointer volatileCurrContextPointers; #endif +#ifdef TARGET_RISCV64 + Riscv64VolatileContextPointer volatileCurrContextPointers; +#endif + REGDISPLAY() { // Initialize @@ -329,6 +355,8 @@ inline LPVOID GetRegdisplayReturnValue(REGDISPLAY *display) return (LPVOID)display->pCurrentContext->Eax; #elif defined(TARGET_LOONGARCH64) return (LPVOID)display->pCurrentContext->A0; +#elif defined(TARGET_RISCV64) + return (LPVOID)display->pCurrentContext->A0; #else PORTABILITY_ASSERT("GetRegdisplayReturnValue NYI for this platform (Regdisp.h)"); return NULL; @@ -397,7 +425,24 @@ inline void FillContextPointers(PT_KNONVOLATILE_CONTEXT_POINTERS pCtxPtrs, PT_CO { *(&pCtxPtrs->Edi + i) = (&pCtx->Edi + i); } -#else // TARGET_X86 +#elif defined(TARGET_RISCV64) // TARGET_X86 + // *(&pCtxPtrs->S0) = &pCtx->S0; + *(&pCtxPtrs->S1) = &pCtx->S1; + *(&pCtxPtrs->S2) = &pCtx->S2; + *(&pCtxPtrs->S3) = &pCtx->S3; + *(&pCtxPtrs->S4) = &pCtx->S4; + *(&pCtxPtrs->S5) = &pCtx->S5; + *(&pCtxPtrs->S6) = &pCtx->S6; + *(&pCtxPtrs->S7) = &pCtx->S7; + *(&pCtxPtrs->S8) = &pCtx->S8; + *(&pCtxPtrs->S9) = &pCtx->S9; + *(&pCtxPtrs->S10) = &pCtx->S10; + *(&pCtxPtrs->S11) = &pCtx->S11; + *(&pCtxPtrs->Gp) = &pCtx->Gp; + *(&pCtxPtrs->Tp) = &pCtx->Tp; + *(&pCtxPtrs->Fp) = &pCtx->Fp; + *(&pCtxPtrs->Ra) = &pCtx->Ra; +#else // TARGET_RISCV64 PORTABILITY_ASSERT("FillContextPointers"); #endif // _TARGET_???_ (ELSE) } @@ -488,7 +533,23 @@ inline void FillRegDisplay(const PREGDISPLAY pRD, PT_CONTEXT pctx, PT_CONTEXT pC pRD->volatileCurrContextPointers.T7 = &pctx->T7; pRD->volatileCurrContextPointers.T8 = &pctx->T8; pRD->volatileCurrContextPointers.X0 = &pctx->X0; -#endif // TARGET_LOONGARCH64 +#elif defined(TARGET_RISCV64) // TARGET_LOONGARCH64 + pRD->volatileCurrContextPointers.A0 = &pctx->A0; + pRD->volatileCurrContextPointers.A1 = &pctx->A1; + pRD->volatileCurrContextPointers.A2 = &pctx->A2; + pRD->volatileCurrContextPointers.A3 = &pctx->A3; + pRD->volatileCurrContextPointers.A4 = &pctx->A4; + pRD->volatileCurrContextPointers.A5 = &pctx->A5; + pRD->volatileCurrContextPointers.A6 = &pctx->A6; + pRD->volatileCurrContextPointers.A7 = &pctx->A7; + pRD->volatileCurrContextPointers.T0 = &pctx->T0; + pRD->volatileCurrContextPointers.T1 = &pctx->T1; + pRD->volatileCurrContextPointers.T2 = &pctx->T2; + pRD->volatileCurrContextPointers.T3 = &pctx->T3; + pRD->volatileCurrContextPointers.T4 = &pctx->T4; + pRD->volatileCurrContextPointers.T5 = &pctx->T5; + pRD->volatileCurrContextPointers.T6 = &pctx->T6; +#endif // TARGET_RISCV64 #ifdef DEBUG_REGDISPLAY pRD->_pThread = NULL; @@ -571,6 +632,9 @@ inline size_t * getRegAddr (unsigned regNum, PTR_CONTEXT regs) #elif defined(TARGET_LOONGARCH64) _ASSERTE(regNum < 32); return (size_t *)®s->R0 + regNum; +#elif defined(TARGET_RISCV64) + _ASSERTE(regNum < 32); + return (size_t *)®s->R0 + regNum; #else _ASSERTE(!"@TODO Port - getRegAddr (Regdisp.h)"); #endif diff --git a/src/coreclr/inc/switches.h b/src/coreclr/inc/switches.h index 65748b46099089..cb3d05b83d4c0b 100644 --- a/src/coreclr/inc/switches.h +++ b/src/coreclr/inc/switches.h @@ -53,7 +53,7 @@ #if defined(TARGET_X86) || defined(TARGET_ARM) #define USE_LAZY_PREFERRED_RANGE 0 -#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_S390X) || defined(TARGET_LOONGARCH64) || defined(TARGET_POWERPC64) +#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_S390X) || defined(TARGET_LOONGARCH64) || defined(TARGET_POWERPC64) || defined(TARGET_RISCV64) #if defined(HOST_UNIX) // In PAL we have a smechanism that reserves memory on start up that is diff --git a/src/coreclr/inc/targetosarch.h b/src/coreclr/inc/targetosarch.h index 9025a8608af0fc..fbe7806cf7d1c3 100644 --- a/src/coreclr/inc/targetosarch.h +++ b/src/coreclr/inc/targetosarch.h @@ -42,6 +42,7 @@ class TargetArchitecture static const bool IsArm32 = true; static const bool IsArmArch = true; static const bool IsLoongArch64 = false; + static const bool IsRiscv64 = false; #elif defined(TARGET_ARM64) static const bool IsX86 = false; static const bool IsX64 = false; @@ -49,6 +50,7 @@ class TargetArchitecture static const bool IsArm32 = false; static const bool IsArmArch = true; static const bool IsLoongArch64 = false; + static const bool IsRiscv64 = false; #elif defined(TARGET_AMD64) static const bool IsX86 = false; static const bool IsX64 = true; @@ -56,6 +58,7 @@ class TargetArchitecture static const bool IsArm32 = false; static const bool IsArmArch = false; static const bool IsLoongArch64 = false; + static const bool IsRiscv64 = false; #elif defined(TARGET_X86) static const bool IsX86 = true; static const bool IsX64 = false; @@ -63,6 +66,7 @@ class TargetArchitecture static const bool IsArm32 = false; static const bool IsArmArch = false; static const bool IsLoongArch64 = false; + static const bool IsRiscv64 = false; #elif defined(TARGET_LOONGARCH64) static const bool IsX86 = false; static const bool IsX64 = false; @@ -70,6 +74,15 @@ class TargetArchitecture static const bool IsArm32 = false; static const bool IsArmArch = false; static const bool IsLoongArch64 = true; + static const bool IsRiscv64 = false; +#elif defined(TARGET_RISCV64) + static const bool IsX86 = false; + static const bool IsX64 = false; + static const bool IsArm64 = false; + static const bool IsArm32 = false; + static const bool IsArmArch = false; + static const bool IsLoongArch64 = false; + static const bool IsRiscv64 = true; #else #error Unknown architecture #endif diff --git a/src/coreclr/inc/volatile.h b/src/coreclr/inc/volatile.h index 177c4932166c69..030638c2ef2a9a 100644 --- a/src/coreclr/inc/volatile.h +++ b/src/coreclr/inc/volatile.h @@ -68,8 +68,8 @@ #error The Volatile type is currently only defined for Visual C++ and GNU C++ #endif -#if defined(__GNUC__) && !defined(HOST_X86) && !defined(HOST_AMD64) && !defined(HOST_ARM) && !defined(HOST_ARM64) && !defined(HOST_LOONGARCH64) && !defined(HOST_RISCV64) && !defined(HOST_S390X) && !defined(HOST_POWERPC64) -#error The Volatile type is currently only defined for GCC when targeting x86, AMD64, ARM, ARM64, LOONGARCH64, RISCV64, PPC64LE, or S390X CPUs +#if defined(__GNUC__) && !defined(HOST_X86) && !defined(HOST_AMD64) && !defined(HOST_ARM) && !defined(HOST_ARM64) && !defined(HOST_LOONGARCH64) && !defined(HOST_RISCV64) && !defined(HOST_S390X) && !defined(HOST_POWERPC64) && !defined(HOST_RISCV64) +#error The Volatile type is currently only defined for GCC when targeting x86, AMD64, ARM, ARM64, LOONGARCH64, RISCV64, PPC64LE, S390X, or RISCV64 CPUs #endif #if defined(__GNUC__) diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index a3d65680ba0651..19e416aed9e10f 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -54,6 +54,9 @@ function(create_standalone_jit) elseif(TARGETDETAILS_ARCH STREQUAL "loongarch64") set(JIT_ARCH_SOURCES ${JIT_LOONGARCH64_SOURCES}) set(JIT_ARCH_HEADERS ${JIT_LOONGARCH64_HEADERS}) + elseif(TARGETDETAILS_ARCH STREQUAL "riscv64") + set(JIT_ARCH_SOURCES ${JIT_RISCV64_SOURCES}) + set(JIT_ARCH_HEADERS ${JIT_RISCV64_HEADERS}) else() clr_unknown_arch() endif() @@ -258,6 +261,15 @@ set( JIT_LOONGARCH64_SOURCES unwindloongarch64.cpp ) +set( JIT_RISCV64_SOURCES + codegenriscv64.cpp + emitriscv64.cpp + lowerriscv64.cpp + lsrariscv64.cpp + targetriscv64.cpp + unwindriscv64.cpp +) + # We include the headers here for better experience in IDEs. set( JIT_HEADERS ../inc/corinfo.h @@ -415,6 +427,13 @@ set( JIT_LOONGARCH64_HEADERS registerloongarch64.h ) +set( JIT_RISCV64_HEADERS + emitriscv64.h + emitfmtsriscv64.h + instrsriscv64.h + registerriscv64.h +) + convert_to_absolute_path(JIT_SOURCES ${JIT_SOURCES}) convert_to_absolute_path(JIT_HEADERS ${JIT_HEADERS}) convert_to_absolute_path(JIT_RESOURCES ${JIT_RESOURCES}) @@ -435,6 +454,8 @@ convert_to_absolute_path(JIT_S390X_SOURCES ${JIT_S390X_SOURCES}) convert_to_absolute_path(JIT_S390X_HEADERS ${JIT_S390X_HEADERS}) convert_to_absolute_path(JIT_LOONGARCH64_SOURCES ${JIT_LOONGARCH64_SOURCES}) convert_to_absolute_path(JIT_LOONGARCH64_HEADERS ${JIT_LOONGARCH64_HEADERS}) +convert_to_absolute_path(JIT_RISCV64_SOURCES ${JIT_RISCV64_SOURCES}) +convert_to_absolute_path(JIT_RISCV64_HEADERS ${JIT_RISCV64_HEADERS}) if(CLR_CMAKE_TARGET_ARCH_AMD64) set(JIT_ARCH_SOURCES ${JIT_AMD64_SOURCES}) @@ -461,6 +482,8 @@ elseif(CLR_CMAKE_TARGET_ARCH_LOONGARCH64) set(JIT_ARCH_SOURCES ${JIT_LOONGARCH64_SOURCES}) set(JIT_ARCH_HEADERS ${JIT_LOONGARCH64_HEADERS}) elseif(CLR_CMAKE_TARGET_ARCH_RISCV64) + set(JIT_ARCH_SOURCES ${JIT_RISCV64_SOURCES}) + set(JIT_ARCH_HEADERS ${JIT_RISCV64_HEADERS}) else() clr_unknown_arch() endif() @@ -590,11 +613,6 @@ else() set(TARGET_OS_NAME win) endif() -if (NOT CLR_CMAKE_TARGET_ARCH_RISCV64) - create_standalone_jit(TARGET clrjit OS ${TARGET_OS_NAME} ARCH ${ARCH_TARGET_NAME} DESTINATIONS . sharedFramework) - install_clr(TARGETS clrjit DESTINATIONS . sharedFramework COMPONENT jit) -endif() - # Enable profile guided optimization add_pgo(clrjit) @@ -608,22 +626,26 @@ if (CLR_CMAKE_TARGET_ARCH_LOONGARCH64) create_standalone_jit(TARGET clrjit_unix_loongarch64_${ARCH_HOST_NAME} OS unix ARCH loongarch64 DESTINATIONS .) endif (CLR_CMAKE_TARGET_ARCH_LOONGARCH64) -create_standalone_jit(TARGET clrjit_universal_arm_${ARCH_HOST_NAME} OS universal ARCH arm DESTINATIONS .) -target_compile_definitions(clrjit_universal_arm_${ARCH_HOST_NAME} PRIVATE ARM_SOFTFP CONFIGURABLE_ARM_ABI) -create_standalone_jit(TARGET clrjit_win_x86_${ARCH_HOST_NAME} OS win ARCH x86 DESTINATIONS .) +if (CLR_CMAKE_TARGET_ARCH_RISCV64) + create_standalone_jit(TARGET clrjit_unix_riscv64_${ARCH_HOST_NAME} OS unix ARCH riscv64 DESTINATIONS .) +else() + create_standalone_jit(TARGET clrjit_universal_arm_${ARCH_HOST_NAME} OS universal ARCH arm DESTINATIONS .) + target_compile_definitions(clrjit_universal_arm_${ARCH_HOST_NAME} PRIVATE ARM_SOFTFP CONFIGURABLE_ARM_ABI) + create_standalone_jit(TARGET clrjit_win_x86_${ARCH_HOST_NAME} OS win ARCH x86 DESTINATIONS .) +endif (CLR_CMAKE_TARGET_ARCH_RISCV64) if (CLR_CMAKE_TARGET_ARCH_I386 AND CLR_CMAKE_TARGET_UNIX) create_standalone_jit(TARGET clrjit_unix_x86_${ARCH_HOST_NAME} OS unix ARCH x86 DESTINATIONS .) endif (CLR_CMAKE_TARGET_ARCH_I386 AND CLR_CMAKE_TARGET_UNIX) -if (CLR_CMAKE_TARGET_UNIX AND NOT CLR_CMAKE_TARGET_ARCH_RISCV64) - if (NOT ARCH_TARGET_NAME STREQUAL s390x AND NOT ARCH_TARGET_NAME STREQUAL armv6 AND NOT ARCH_TARGET_NAME STREQUAL ppc64le) +if (CLR_CMAKE_TARGET_UNIX) + if (NOT ARCH_TARGET_NAME STREQUAL s390x AND NOT ARCH_TARGET_NAME STREQUAL armv6 AND NOT ARCH_TARGET_NAME STREQUAL ppc64le AND NOT ARCH_TARGET_NAME STREQUAL riscv64) if(CLR_CMAKE_TARGET_ARCH_ARM OR CLR_CMAKE_TARGET_ARCH_ARM64) install_clr(TARGETS clrjit_universal_${ARCH_TARGET_NAME}_${ARCH_HOST_NAME} DESTINATIONS . COMPONENT jit) else() install_clr(TARGETS clrjit_unix_${ARCH_TARGET_NAME}_${ARCH_HOST_NAME} DESTINATIONS . COMPONENT jit) endif() - endif(NOT ARCH_TARGET_NAME STREQUAL s390x AND NOT ARCH_TARGET_NAME STREQUAL armv6 AND NOT ARCH_TARGET_NAME STREQUAL ppc64le) + endif(NOT ARCH_TARGET_NAME STREQUAL s390x AND NOT ARCH_TARGET_NAME STREQUAL armv6 AND NOT ARCH_TARGET_NAME STREQUAL ppc64le AND NOT ARCH_TARGET_NAME STREQUAL riscv64) endif() if (CLR_CMAKE_TARGET_WIN32 AND CLR_CMAKE_PGO_INSTRUMENT) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index c75ae055e52d76..c86eabcde01632 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -233,7 +233,7 @@ class CodeGen final : public CodeGenInterface void genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, BasicBlock* failBlk = nullptr); -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, instruction ins, regNumber reg1, @@ -257,7 +257,7 @@ class CodeGen final : public CodeGenInterface // void genEstablishFramePointer(int delta, bool reportUnwindData); -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void genFnPrologCalleeRegArgs(); #else void genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState); @@ -273,7 +273,7 @@ class CodeGen final : public CodeGenInterface void genClearStackVec3ArgUpperBits(); #endif // UNIX_AMD64_ABI && FEATURE_SIMD -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool genInstrWithConstant(instruction ins, emitAttr attr, regNumber reg1, @@ -437,7 +437,27 @@ class CodeGen final : public CodeGenInterface }; FuncletFrameInfoDsc genFuncletInfo; -#endif // TARGET_LOONGARCH64 + +#elif defined(TARGET_RISCV64) + + // A set of information that is used by funclet prolog and epilog generation. + // It is collected once, before funclet prologs and epilogs are generated, + // and used by all funclet prologs and epilogs, which must all be the same. + struct FuncletFrameInfoDsc + { + regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes RA) + int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function + // (negative) + int fiSP_to_FPRA_save_delta; // FP/RA register save offset from SP (positive) + int fiSP_to_PSP_slot_delta; // PSP slot offset from SP (positive) + int fiCallerSP_to_PSP_slot_delta; // PSP slot offset from Caller SP (negative) + int fiFrameType; // Funclet frame types are numbered. See genFuncletProlog() for details. + int fiSpDelta1; // Stack pointer delta 1 (negative) + }; + + FuncletFrameInfoDsc genFuncletInfo; + +#endif // TARGET_ARM, TARGET_ARM64, TARGET_AMD64, TARGET_LOONGARCH64, TARGET_RISCV64 #if defined(TARGET_XARCH) @@ -780,9 +800,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genLeaInstruction(GenTreeAddrMode* lea); void genSetRegToCond(regNumber dstReg, GenTree* tree); -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale); -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 #if defined(TARGET_ARMARCH) void genCodeForMulLong(GenTreeOp* mul); @@ -1183,7 +1203,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genTableBasedSwitch(GenTree* tree); void genCodeForArrIndex(GenTreeArrIndex* treeNode); void genCodeForArrOffset(GenTreeArrOffs* treeNode); -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) instruction genGetInsForOper(GenTree* treeNode); #else instruction genGetInsForOper(genTreeOps oper, var_types type); @@ -1195,7 +1215,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackArgBytes)); void genJmpMethod(GenTree* jmp); BasicBlock* genCallFinally(BasicBlock* block); -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // TODO: refactor for LA. void genCodeForJumpCompare(GenTreeOp* tree); #endif @@ -1229,9 +1249,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genFloatReturn(GenTree* treeNode); #endif // TARGET_X86 -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void genSimpleReturn(GenTree* treeNode); -#endif // TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 void genReturn(GenTree* treeNode); @@ -1541,7 +1561,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX static instruction JumpKindToCmov(emitJumpKind condition); #endif -#ifndef TARGET_LOONGARCH64 +#if !defined(TARGET_LOONGARCH64) // Maps a GenCondition code to a sequence of conditional jumps or other conditional instructions // such as X86's SETcc. A sequence of instructions rather than just a single one is required for // certain floating point conditions. diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 3c003a3e499818..da07a348254514 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -135,9 +135,9 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler) /* Assume that we not fully interruptible */ SetInterruptible(false); -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) SetHasTailCalls(false); -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 #ifdef DEBUG genInterruptibleUsed = false; genCurDispOffset = (unsigned)-1; @@ -793,7 +793,11 @@ template void Compiler::compChangeLife(VARSET_VALARG_TP newLife); */ void CodeGenInterface::spillReg(var_types type, TempDsc* tmp, regNumber reg) { +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), reg, REG_NA, tmp->tdTempNum(), 0); +#else GetEmitter()->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0); +#endif } /***************************************************************************** @@ -1118,7 +1122,7 @@ bool CodeGen::genCreateAddrMode( cns += op2->AsIntConCommon()->IconValue(); -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (cns == 0) #endif { @@ -1138,7 +1142,7 @@ bool CodeGen::genCreateAddrMode( goto AGAIN; -#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index. case GT_MUL: if (op1->gtOverflow()) @@ -1161,7 +1165,7 @@ bool CodeGen::genCreateAddrMode( goto FOUND_AM; } break; -#endif // !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) +#endif // !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) default: break; @@ -1182,7 +1186,7 @@ bool CodeGen::genCreateAddrMode( switch (op1->gtOper) { -#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index. case GT_ADD: @@ -1244,7 +1248,7 @@ bool CodeGen::genCreateAddrMode( goto FOUND_AM; } break; -#endif // !TARGET_ARMARCH && !TARGET_LOONGARCH64 +#endif // !TARGET_ARMARCH && !TARGET_LOONGARCH64 && !TARGET_RISCV64 case GT_NOP: @@ -1263,7 +1267,7 @@ bool CodeGen::genCreateAddrMode( noway_assert(op2); switch (op2->gtOper) { -#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) // TODO-ARM64-CQ, TODO-ARM-CQ: For now we only handle MUL and LSH because // arm doesn't support both scale and offset at the same. Offset is handled // at the emitter as a peephole optimization. @@ -1323,7 +1327,7 @@ bool CodeGen::genCreateAddrMode( goto FOUND_AM; } break; -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 case GT_NOP: @@ -1553,7 +1557,7 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi * have set the flags. Check if the operation caused an overflow. */ -#ifndef TARGET_LOONGARCH64 +#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) // inline void CodeGen::genCheckOverflow(GenTree* tree) { @@ -1592,6 +1596,9 @@ void CodeGen::genCheckOverflow(GenTree* tree) jumpKind = EJ_hs; } } +#elif defined(TARGET_RISCV64) + _ASSERTE(!"TODO RISCV64"); + jumpKind = EJ_NONE; // TODO RISCV64 #endif // defined(TARGET_ARMARCH) } @@ -1762,6 +1769,10 @@ void CodeGen::genGenerateMachineCode() { printf("generic LOONGARCH64 CPU"); } + else if (compiler->info.genCPU == CPU_RISCV64) + { + printf("generic RISCV64 CPU"); + } else { printf("unknown architecture"); @@ -1968,7 +1979,7 @@ void CodeGen::genEmitMachineCode() bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ? -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) trackedStackPtrsContig = false; #elif defined(TARGET_ARM) // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous @@ -2828,7 +2839,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function #endif -#ifndef TARGET_LOONGARCH64 +#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState) { #ifdef DEBUG @@ -3539,7 +3550,11 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere // Since slot is typically 1, baseOffset is typically 0 int baseOffset = (regArgTab[argNum].slot - 1) * slotSize; +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, REG_NA, varNum, baseOffset); +#else GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset); +#endif #ifndef UNIX_AMD64_ABI // Check if we are writing past the end of the struct @@ -4086,7 +4101,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop } } -#endif // !TARGET_LOONGARCH64 +#endif // !TARGET_LOONGARCH64 && !TARGET_RISCV64 #ifdef _PREFAST_ #pragma warning(pop) @@ -4112,7 +4127,7 @@ void CodeGen::genEnregisterIncomingStackArgs() unsigned varNum = 0; -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) int tmp_offset = 0; regNumber tmp_reg = REG_NA; #endif @@ -4191,6 +4206,34 @@ void CodeGen::genEnregisterIncomingStackArgs() } } } +#elif defined(TARGET_RISCV64) + { + bool FPbased; + int base = compiler->lvaFrameAddress(varNum, &FPbased); + + if (emitter::isValidSimm12(base)) + { + GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0); + } + else + { + if (tmp_reg == REG_NA) + { + regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; + tmp_offset = base; + tmp_reg = REG_RA; // TODO CHECK R21 => RA + + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_RA, base); // TODO CHECK R21 => RA + GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, REG_RA, REG_RA, reg2); // TODO CHECK R21 => RA + GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, -8); + } + else + { + int baseOffset = -(base - tmp_offset) - 8; + GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, baseOffset); + } + } + } #else // !TARGET_LOONGARCH64 GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0); #endif // !TARGET_LOONGARCH64 @@ -4488,6 +4531,8 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& #elif defined(TARGET_LOONGARCH64) // We will just zero out the entire vector register. This sets it to a double/float zero value GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, reg, REG_R0); +#elif defined(TARGET_RISCV64) + _ASSERTE(!"TODO RISCV64 NYI"); #else // TARGET* #error Unsupported or unset target architecture #endif @@ -4525,6 +4570,8 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B); #elif defined(TARGET_LOONGARCH64) GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, reg, REG_R0); +#elif defined(TARGET_RISCV64) + _ASSERTE(!"TODO RISCV64 NYI"); #else // TARGET* #error Unsupported or unset target architecture #endif @@ -4542,6 +4589,8 @@ regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed) return REG_ZR; #elif defined(TARGET_LOONGARCH64) return REG_R0; +#elif defined(TARGET_RISCV64) + return REG_R0; #else // !TARGET_ARM64 if (*pInitRegZeroed == false) { @@ -4616,7 +4665,11 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, if (layout->IsGCPtr(i)) { GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, +#ifdef TARGET_RISCV64 + genGetZeroReg(initReg, pInitRegZeroed), REG_NA, varNum, i * REGSIZE_BYTES); +#else genGetZeroReg(initReg, pInitRegZeroed), varNum, i * REGSIZE_BYTES); +#endif } } } @@ -4629,14 +4682,22 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, unsigned i; for (i = 0; i + REGSIZE_BYTES <= lclSize; i += REGSIZE_BYTES) { +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, zeroReg, REG_NA, varNum, i); +#else GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, zeroReg, varNum, i); +#endif } #ifdef TARGET_64BIT assert(i == lclSize || (i + sizeof(int) == lclSize)); if (i != lclSize) { +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, zeroReg, REG_NA, varNum, i); +#else GetEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, zeroReg, varNum, i); +#endif i += sizeof(int); } #endif // TARGET_64BIT @@ -4952,11 +5013,14 @@ void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed #elif defined(TARGET_LOONGARCH64) genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), compiler->lvaCachedGenericContextArgOffset(), REG_R21); -#else // !ARM64 !ARM !LOONGARCH64 +#elif defined(TARGET_RISCV64) + genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), + compiler->lvaCachedGenericContextArgOffset(), REG_RA); +#else // !ARM64 !ARM !LOONGARCH64 !RISCV64 // mov [ebp-lvaCachedGenericContextArgOffset()], reg GetEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), compiler->lvaCachedGenericContextArgOffset()); -#endif // !ARM64 !ARM !LOONGARCH64 +#endif // !ARM64 !ARM !LOONGARCH64 !RISCV64 } /***************************************************************************** @@ -5341,7 +5405,7 @@ void CodeGen::genFinalizeFrame() maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED; #endif // defined(TARGET_XARCH) -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (isFramePointerUsed()) { // For a FP based frame we have to push/pop the FP register @@ -5356,7 +5420,7 @@ void CodeGen::genFinalizeFrame() // we always push RA. See genPushCalleeSavedRegisters maskCalleeRegsPushed |= RBM_RA; -#endif // TARGET_LOONGARCH64 +#endif // TARGET_LOONGARCH64 || TARGET_RISCV64 compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed); @@ -5472,10 +5536,10 @@ void CodeGen::genFnProlog() instGen(INS_nop); instGen(INS_BREAKPOINT); -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Avoid asserts in the unwind info because these instructions aren't accounted for. compiler->unwindPadding(); -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 } #endif // DEBUG @@ -5878,16 +5942,16 @@ void CodeGen::genFnProlog() } #endif // TARGET_XARCH -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) genPushCalleeSavedRegisters(initReg, &initRegZeroed); -#else // !TARGET_ARM64 || !TARGET_LOONGARCH64 +#else // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 if (!isOSRx64Root) { genPushCalleeSavedRegisters(); } -#endif // !TARGET_ARM64 || !TARGET_LOONGARCH64 +#endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 #ifdef TARGET_ARM bool needToEstablishFP = false; @@ -5918,7 +5982,7 @@ void CodeGen::genFnProlog() //------------------------------------------------------------------------- CLANG_FORMAT_COMMENT_ANCHOR; -#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) regMaskTP maskStackAlloc = RBM_NONE; #ifdef TARGET_ARM @@ -5931,7 +5995,7 @@ void CodeGen::genFnProlog() genAllocLclFrame(compiler->compLclFrameSize + extraFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn); } -#endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 +#endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 #ifdef TARGET_AMD64 // For x64 OSR we have to finish saving int callee saves. @@ -5983,7 +6047,11 @@ void CodeGen::genFnProlog() if (compiler->info.compPublishStubParam) { +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, REG_NA, +#else GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, +#endif compiler->lvaStubArgumentVar, 0); assert(intRegState.rsCalleeRegArgMaskLiveIn & RBM_SECRET_STUB_PARAM); @@ -6018,8 +6086,13 @@ void CodeGen::genFnProlog() initRegZeroed = true; } +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, REG_NA, compiler->lvaShadowSPslotsVar, + firstSlotOffs); +#else GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar, firstSlotOffs); +#endif } #endif // !FEATURE_EH_FUNCLETS @@ -6030,7 +6103,11 @@ void CodeGen::genFnProlog() // Initialize the LocalAllocSP slot if there is localloc in the function. if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM) { +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, REG_NA, compiler->lvaLocAllocSPvar, 0); +#else GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0); +#endif } #endif // JIT32_GCENCODER @@ -6106,7 +6183,7 @@ void CodeGen::genFnProlog() { compiler->lvaUpdateArgsWithInitialReg(); -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (intRegState.rsCalleeRegArgMaskLiveIn || floatRegState.rsCalleeRegArgMaskLiveIn) { initRegZeroed = false; @@ -6149,7 +6226,7 @@ void CodeGen::genFnProlog() assignIncomingRegisterArgs(&intRegState); #endif -#endif // TARGET_LOONGARCH64 +#endif // TARGET_LOONGARCH64 || TARGET_RISCV64 // Home the incoming arguments. genEnregisterIncomingStackArgs(); @@ -6276,7 +6353,11 @@ void CodeGen::genFnProlog() } else { +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_NA, argsStartVar, 0); +#else GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, argsStartVar, 0); +#endif } } @@ -6288,7 +6369,11 @@ void CodeGen::genFnProlog() assert(compiler->lvaReturnSpCheck != BAD_VAR_NUM); assert(compiler->lvaGetDesc(compiler->lvaReturnSpCheck)->lvDoNotEnregister); assert(compiler->lvaGetDesc(compiler->lvaReturnSpCheck)->lvOnFrame); +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, REG_NA, compiler->lvaReturnSpCheck, 0); +#else GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnSpCheck, 0); +#endif } #endif // defined(DEBUG) && defined(TARGET_XARCH) @@ -6467,7 +6552,7 @@ bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass, CorInfoCallCo structPassingKind howToReturnStruct; var_types returnType = getReturnTypeForStruct(hClass, callConv, &howToReturnStruct); -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) return (varTypeIsStruct(returnType) && (howToReturnStruct != SPK_PrimitiveType)); #else return (varTypeIsStruct(returnType)); @@ -6546,7 +6631,7 @@ unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass) // unsigned CodeGen::getFirstArgWithStackSlot() { -#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) unsigned baseVarNum = 0; // Iterate over all the lvParam variables in the Lcl var table until we find the first one // that's passed on the stack. @@ -7730,9 +7815,9 @@ void CodeGen::genReturn(GenTree* treeNode) // exit point where it is actually dead. genConsumeReg(op1); -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) genSimpleReturn(treeNode); -#else // !TARGET_ARM64 || !TARGET_LOONGARCH64 +#else // !TARGET_ARM64 || !TARGET_LOONGARCH64 || !TARGET_RISCV64 #if defined(TARGET_X86) if (varTypeUsesFloatReg(treeNode)) { @@ -7760,7 +7845,7 @@ void CodeGen::genReturn(GenTree* treeNode) regNumber retReg = varTypeUsesFloatReg(treeNode) ? REG_FLOATRET : REG_INTRET; inst_Mov_Extend(targetType, /* srcInReg */ true, retReg, op1->GetRegNum(), /* canSkip */ true); } -#endif // !TARGET_ARM64 || !TARGET_LOONGARCH64 +#endif // !TARGET_ARM64 || !TARGET_LOONGARCH64 || !TARGET_RISCV64 } } @@ -7920,7 +8005,7 @@ void CodeGen::genStructReturn(GenTree* treeNode) LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); assert(varDsc->lvIsMultiRegRet); -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // On LoongArch64, for a struct like "{ int, double }", "retTypeDesc" will be "{ TYP_INT, TYP_DOUBLE }", // i. e. not include the padding for the first field, and so the general loop below won't work. var_types type = retTypeDesc.GetReturnRegType(0); @@ -7935,7 +8020,7 @@ void CodeGen::genStructReturn(GenTree* treeNode) toReg = retTypeDesc.GetABIReturnReg(1); GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset); } -#else // !TARGET_LOONGARCH64 +#else // !TARGET_LOONGARCH64 && !TARGET_RISCV64 int offset = 0; for (unsigned i = 0; i < regCount; ++i) { @@ -7944,7 +8029,7 @@ void CodeGen::genStructReturn(GenTree* treeNode) GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset); offset += genTypeSize(type); } -#endif // !TARGET_LOONGARCH64 +#endif // !TARGET_LOONGARCH64 && !TARGET_RISCV64 } else { @@ -8107,21 +8192,29 @@ void CodeGen::genMultiRegStoreToLocal(GenTreeLclVar* lclNode) { // A byte field passed in a long register should be written on the stack as a byte. instruction storeIns = ins_StoreFromSrc(reg, destType); +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(storeIns, emitTypeSize(destType), reg, REG_NA, fieldLclNum, 0); +#else GetEmitter()->emitIns_S_R(storeIns, emitTypeSize(destType), reg, fieldLclNum, 0); +#endif } } fieldVarDsc->SetRegNum(varReg); } else { -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // should consider the padding field within a struct. offset = (offset % genTypeSize(srcType)) ? AlignUp(offset, genTypeSize(srcType)) : offset; #endif // Several fields could be passed in one register, copy using the register type. // It could rewrite memory outside of the fields but local on the stack are rounded to POINTER_SIZE so // it is safe to store a long register into a byte field as it is known that we have enough padding after. +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(ins_Store(srcType), emitTypeSize(srcType), reg, REG_NA, lclNum, offset); +#else GetEmitter()->emitIns_S_R(ins_Store(srcType), emitTypeSize(srcType), reg, lclNum, offset); +#endif offset += genTypeSize(srcType); #ifdef DEBUG @@ -8387,7 +8480,11 @@ void CodeGen::genStackPointerCheck(bool doStackPointerCheck, assert(regTmp != REG_NA); GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, regTmp, REG_SPBASE, /* canSkip */ false); GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, regTmp, offset); +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regTmp, REG_NA, lvaStackPointerVar, 0); +#else GetEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regTmp, lvaStackPointerVar, 0); +#endif } else { @@ -9300,14 +9397,22 @@ void CodeGen::genPoisonFrame(regMaskTP regLiveIn) #ifdef TARGET_64BIT if ((offs % 8) == 0 && end - offs >= 8) { +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(ins_Store(TYP_LONG), EA_8BYTE, REG_SCRATCH, REG_NA, (int)varNum, offs - addr); +#else GetEmitter()->emitIns_S_R(ins_Store(TYP_LONG), EA_8BYTE, REG_SCRATCH, (int)varNum, offs - addr); +#endif offs += 8; continue; } #endif assert((offs % 4) == 0 && end - offs >= 4); +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, REG_SCRATCH, REG_NA, (int)varNum, offs - addr); +#else GetEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, REG_SCRATCH, (int)varNum, offs - addr); +#endif offs += 4; } } diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index 0563f4fc26edc9..86bc7a7573adf5 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -118,7 +118,7 @@ class CodeGenInterface private: #if defined(TARGET_XARCH) static const insFlags instInfo[INS_count]; -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) static const BYTE instInfo[INS_count]; #else #error Unsupported target architecture @@ -364,7 +364,7 @@ class CodeGenInterface m_cgInterruptible = value; } -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool GetHasTailCalls() { @@ -374,13 +374,13 @@ class CodeGenInterface { m_cgHasTailCalls = value; } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 private: bool m_cgInterruptible; -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool m_cgHasTailCalls; -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 // The following will be set to true if we've determined that we need to // generate a full-blown pointer register map for the current method. diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 3051aec3b67f10..b5a9fb37cfdc7e 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1214,7 +1214,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) assert(spillType != TYP_UNDEF); // TODO-Cleanup: The following code could probably be further merged and cleaned up. -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Load local variable from its home location. // Never allow truncating the locals here, otherwise a subsequent // use of the local with a wider type would see the truncated @@ -1869,7 +1869,11 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArg #endif // FEATURE_SIMD { emitAttr attr = emitTypeSize(type); +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(ins_Store(type), attr, reg, REG_NA, outArgVarNum, thisFieldOffset); +#else GetEmitter()->emitIns_S_R(ins_Store(type), attr, reg, outArgVarNum, thisFieldOffset); +#endif } // We can't write beyond the arg area unless this is a tail call, in which case we use @@ -2061,7 +2065,12 @@ void CodeGen::genSpillLocal(unsigned varNum, var_types type, GenTreeLclVar* lclN { // Store local variable to its home location. // Ensure that lclVar stores are typed correctly. +#ifdef TARGET_RISCV64 + assert(!compiler->isSIMDTypeLocalAligned(varNum)); + GetEmitter()->emitIns_S_R(ins_Store(type, compiler->isSIMDTypeLocalAligned(varNum)), emitTypeSize(type), regNum, REG_NA, +#else GetEmitter()->emitIns_S_R(ins_Store(type, compiler->isSIMDTypeLocalAligned(varNum)), emitTypeSize(type), regNum, +#endif varNum, 0); } } @@ -2477,7 +2486,7 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast) m_checkKind = CHECK_NONE; } -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // For LoongArch64's ISA which is same with the MIPS64 ISA, even the instructions of 32bits operation need // the upper 32bits be sign-extended to 64 bits. m_extendKind = SIGN_EXTEND_INT; @@ -2582,12 +2591,17 @@ void CodeGen::genStoreLongLclVar(GenTree* treeNode) noway_assert((loVal->GetRegNum() != REG_NA) && (hiVal->GetRegNum() != REG_NA)); +#ifdef TARGET_RISCV64 + emit->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, loVal->GetRegNum(), REG_NA, lclNum, 0); + emit->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, hiVal->GetRegNum(), REG_NA, lclNum, genTypeSize(TYP_INT)); +#else emit->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, loVal->GetRegNum(), lclNum, 0); emit->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, hiVal->GetRegNum(), lclNum, genTypeSize(TYP_INT)); +#endif } #endif // !defined(TARGET_64BIT) -#ifndef TARGET_LOONGARCH64 +#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) //------------------------------------------------------------------------ // genCodeForJcc: Generate code for a GT_JCC node. @@ -2645,4 +2659,4 @@ void CodeGen::genCodeForSetcc(GenTreeCC* setcc) inst_SETCC(setcc->gtCondition, setcc->TypeGet(), setcc->GetRegNum()); genProduceReg(setcc); } -#endif // !TARGET_LOONGARCH64 +#endif // !TARGET_LOONGARCH64 && !TARGET_RISCV64 diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp new file mode 100644 index 00000000000000..d2a04d8b40edc5 --- /dev/null +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -0,0 +1,7787 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX RISCV64 Code Generator XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifdef TARGET_RISCV64 +#include "emit.h" +#include "codegen.h" +#include "lower.h" +#include "gcinfo.h" +#include "gcinfoencoder.h" + +bool CodeGen::genInstrWithConstant(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + ssize_t imm, + regNumber tmpReg, + bool inUnwindRegion /* = false */) +{ + emitAttr size = EA_SIZE(attr); + + // reg1 is usually a dest register + // reg2 is always source register + assert(tmpReg != reg2); // tmpReg can not match any source register + +#ifdef DEBUG + switch (ins) + { + case INS_addi: + + case INS_sb: + case INS_sh: + case INS_sw: + case INS_fsw: + case INS_sd: + case INS_fsd: + + case INS_lb: + case INS_lh: + case INS_lw: + case INS_flw: + case INS_ld: + case INS_fld: + break; + + default: + assert(!"Unexpected instruction in genInstrWithConstant"); + break; + } +#endif + bool immFitsInIns = emitter::isValidSimm12(imm); + + if (immFitsInIns) + { + // generate a single instruction that encodes the immediate directly + GetEmitter()->emitIns_R_R_I(ins, attr, reg1, reg2, imm); + } + else + { + // caller can specify REG_NA for tmpReg, when it "knows" that the immediate will always fit + assert(tmpReg != REG_NA); + + // generate two or more instructions + + // first we load the immediate into tmpReg + assert(!EA_IS_RELOC(size)); + GetEmitter()->emitIns_I_la(size, tmpReg, imm); + regSet.verifyRegUsed(tmpReg); + + // when we are in an unwind code region + // we record the extra instructions using unwindPadding() + if (inUnwindRegion) + { + compiler->unwindPadding(); + } + + if (ins == INS_addi) + { + GetEmitter()->emitIns_R_R_R(INS_add, attr, reg1, reg2, tmpReg); + } + else + { + GetEmitter()->emitIns_R_R_R(INS_add, attr, tmpReg, reg2, tmpReg); + GetEmitter()->emitIns_R_R_I(ins, attr, reg1, tmpReg, 0); + } + } + return immFitsInIns; +} + +void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData) +{ + // Even though INS_addi is specified here, the encoder will choose either + // an INS_add_d or an INS_addi_d and encode the immediate as a positive value + // + bool wasTempRegisterUsedForImm = + !genInstrWithConstant(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta, tmpReg, true); + if (wasTempRegisterUsedForImm) + { + if (pTmpRegIsZero != nullptr) + { + *pTmpRegIsZero = false; + } + } + + if (reportUnwindData) + { + // spDelta is negative in the prolog, positive in the epilog, + // but we always tell the unwind codes the positive value. + ssize_t spDeltaAbs = abs(spDelta); + unsigned unwindSpDelta = (unsigned)spDeltaAbs; + assert((ssize_t)unwindSpDelta == spDeltaAbs); // make sure that it fits in a unsigned + + compiler->unwindAllocStack(unwindSpDelta); + } +} + +void CodeGen::genPrologSaveRegPair(regNumber reg1, + regNumber reg2, + int spOffset, + int spDelta, + bool useSaveNextPair, + regNumber tmpReg, + bool* pTmpRegIsZero) +{ + assert(spOffset >= 0); + assert(spDelta <= 0); + assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both + // FP/SIMD + + instruction ins = INS_sd; + if (genIsValidFloatReg(reg1)) + { + ins = INS_fsd; + } + + if (spDelta != 0) + { + // generate addi.d SP,SP,-imm + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); + + assert((spDelta + spOffset + 16) <= 0); + + assert(spOffset <= 2031); // 2047-16 + } + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8); + compiler->unwindSaveReg(reg2, spOffset + 8); +} + +void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) +{ + assert(spOffset >= 0); + assert(spDelta <= 0); + assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + + instruction ins = INS_sd; + if (genIsValidFloatReg(reg1)) + { + ins = INS_fsd; + } + + if (spDelta != 0) + { + // generate daddiu SP,SP,-imm + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); + } + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); +} + +void CodeGen::genEpilogRestoreRegPair(regNumber reg1, + regNumber reg2, + int spOffset, + int spDelta, + bool useSaveNextPair, + regNumber tmpReg, + bool* pTmpRegIsZero) +{ + assert(spOffset >= 0); + assert(spDelta >= 0); + assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both + // FP/SIMD + + instruction ins = INS_ld; + if (genIsValidFloatReg(reg1)) + { + ins = INS_fld; + } + + if (spDelta != 0) + { + assert(!useSaveNextPair); + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8); + compiler->unwindSaveReg(reg2, spOffset + 8); + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); + + // generate daddiu SP,SP,imm + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); + } + else + { + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8); + compiler->unwindSaveReg(reg2, spOffset + 8); + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); + } +} + +void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) +{ + assert(spOffset >= 0); + assert(spDelta >= 0); + assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + + instruction ins = INS_ld; + if (genIsValidFloatReg(reg1)) + { + ins = INS_fld; + } + + if (spDelta != 0) + { + // ld reg1, offset(SP) + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); + + // generate add SP,SP,imm + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); + } + else + { + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); + } +} + +// static +void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack* regStack) +{ + assert(regStack != nullptr); + assert(regStack->Height() == 0); + + unsigned regsCount = genCountBits(regsMask); + + while (regsMask != RBM_NONE) + { + regMaskTP reg1Mask = genFindLowestBit(regsMask); + regNumber reg1 = genRegNumFromMask(reg1Mask); + regsMask &= ~reg1Mask; + regsCount -= 1; + + bool isPairSave = false; + if (regsCount > 0) + { + regMaskTP reg2Mask = genFindLowestBit(regsMask); + regNumber reg2 = genRegNumFromMask(reg2Mask); + if (reg2 == REG_NEXT(reg1)) + { + // The JIT doesn't allow saving pair (S7,FP), even though the + // save_regp register pair unwind code specification allows it. + // The JIT always saves (FP,RA) as a pair, and uses the save_fpra + // unwind code. This only comes up in stress mode scenarios + // where callee-saved registers are not allocated completely + // from lowest-to-highest, without gaps. + if (reg1 != REG_FP) + { + // Both registers must have the same type to be saved as pair. + if (genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)) + { + isPairSave = true; + + regsMask &= ~reg2Mask; + regsCount -= 1; + + regStack->Push(RegPair(reg1, reg2)); + } + } + } + } + + if (!isPairSave) + { + regStack->Push(RegPair(reg1)); + } + } + assert(regsCount == 0 && regsMask == RBM_NONE); + + genSetUseSaveNextPairs(regStack); +} + +// static +void CodeGen::genSetUseSaveNextPairs(ArrayStack* regStack) +{ + for (int i = 1; i < regStack->Height(); ++i) + { + RegPair& curr = regStack->BottomRef(i); + RegPair prev = regStack->Bottom(i - 1); + + if (prev.reg2 == REG_NA || curr.reg2 == REG_NA) + { + continue; + } + + if (REG_NEXT(prev.reg2) != curr.reg1) + { + continue; + } + + if (genIsValidFloatReg(prev.reg2) != genIsValidFloatReg(curr.reg1)) + { + // It is possible to support changing of the last int pair with the first float pair, + // but it is very rare case and it would require superfluous changes in the unwinder. + continue; + } + curr.useSaveNextPair = true; + } +} + +int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask) +{ + assert((regsMask & (RBM_CALLEE_SAVED | RBM_FP | RBM_RA)) == regsMask); // Do not expect anything else. + + static_assert_no_msg(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES); + return REGSIZE_BYTES; +} + +void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) +{ + const int slotSize = genGetSlotSizeForRegsInMask(regsMask); + + ArrayStack regStack(compiler->getAllocator(CMK_Codegen)); + genBuildRegPairsStack(regsMask, ®Stack); + + for (int i = 0; i < regStack.Height(); ++i) + { + RegPair regPair = regStack.Bottom(i); + if (regPair.reg2 != REG_NA) + { + // We can use two SD instructions. + genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, REG_RA, // TODO REG_R21 => REG_RA + nullptr); + + spOffset += 2 * slotSize; + } + else + { + // No register pair; we use a SD instruction. + genPrologSaveReg(regPair.reg1, spOffset, spDelta, REG_RA, nullptr); // TODO REG_R21 => REG_RA + spOffset += slotSize; + } + + spDelta = 0; // We've now changed SP already, if necessary; don't do it again. + } +} + +void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta) +{ + assert(spDelta <= 0); + + unsigned regsToSaveCount = genCountBits(regsToSaveMask); + if (regsToSaveCount == 0) + { + if (spDelta != 0) + { + // Currently this is the case for varargs only + // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes. + genStackPointerAdjustment(spDelta, REG_T6, nullptr, /* reportUnwindData */ true); // TODO CHECK R21 => T6 + } + return; + } + + assert((spDelta % 16) == 0); + + assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED)); + + // Save integer registers at higher addresses than floating-point registers. + + regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT; + regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat; + + if (maskSaveRegsFloat != RBM_NONE) + { + genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, spDelta, lowestCalleeSavedOffset); + spDelta = 0; + lowestCalleeSavedOffset += genCountBits(maskSaveRegsFloat) * FPSAVE_REGSIZE_BYTES; + } + + if (maskSaveRegsInt != RBM_NONE) + { + genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset); + // No need to update spDelta, lowestCalleeSavedOffset since they're not used after this. + } +} + +void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) +{ + const int slotSize = genGetSlotSizeForRegsInMask(regsMask); + + ArrayStack regStack(compiler->getAllocator(CMK_Codegen)); + genBuildRegPairsStack(regsMask, ®Stack); + + int stackDelta = 0; + for (int i = 0; i < regStack.Height(); ++i) + { + bool lastRestoreInTheGroup = (i == regStack.Height() - 1); + bool updateStackDelta = lastRestoreInTheGroup && (spDelta != 0); + if (updateStackDelta) + { + // Update stack delta only if it is the last restore (the first save). + assert(stackDelta == 0); + stackDelta = spDelta; + } + + RegPair regPair = regStack.Top(i); + if (regPair.reg2 != REG_NA) + { + spOffset -= 2 * slotSize; + + genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, regPair.useSaveNextPair, REG_RA, // TODO REG_R21 => REG_RA + nullptr); + } + else + { + spOffset -= slotSize; + genEpilogRestoreReg(regPair.reg1, spOffset, stackDelta, REG_RA, nullptr); // TODO REG_R21 => REG_RA + } + } +} + +void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta) +{ + assert(spDelta >= 0); + unsigned regsToRestoreCount = genCountBits(regsToRestoreMask); + if (regsToRestoreCount == 0) + { + if (spDelta != 0) + { + // Currently this is the case for varargs only + // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes. + genStackPointerAdjustment(spDelta, REG_T6, nullptr, /* reportUnwindData */ true); // TODO CHECK R21 => T6 + } + return; + } + + assert((spDelta % 16) == 0); + + // We also can restore FP and RA, even though they are not in RBM_CALLEE_SAVED. + assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED | RBM_FP | RBM_RA)); + + // Point past the end, to start. We predecrement to find the offset to load from. + static_assert_no_msg(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES); + int spOffset = lowestCalleeSavedOffset + regsToRestoreCount * REGSIZE_BYTES; + + // Save integer registers at higher addresses than floating-point registers. + + regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT; + regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat; + + // Restore in the opposite order of saving. + + if (maskRestoreRegsInt != RBM_NONE) + { + int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment? + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spIntDelta, spOffset); + spOffset -= genCountBits(maskRestoreRegsInt) * REGSIZE_BYTES; + } + + if (maskRestoreRegsFloat != RBM_NONE) + { + // If there is any spDelta, it must be used here. + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, spDelta, spOffset); + // No need to update spOffset since it's not used after this. + } +} + +// clang-format on + +void CodeGen::genFuncletProlog(BasicBlock* block) +{ +#ifdef DEBUG + if (verbose) + printf("*************** In genFuncletProlog()\n"); +#endif + + assert(block != NULL); + assert(block->bbFlags & BBF_FUNCLET_BEG); + + ScopedSetVariable _setGeneratingProlog(&compiler->compGeneratingProlog, true); + + gcInfo.gcResetForBB(); + + compiler->unwindBegProlog(); + + regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; + regMaskTP maskSaveRegsInt = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat; + + // Funclets must always save RA and FP, since when we have funclets we must have an FP frame. + assert((maskSaveRegsInt & RBM_RA) != 0); + assert((maskSaveRegsInt & RBM_FP) != 0); + + bool isFilter = (block->bbCatchTyp == BBCT_FILTER); + int frameSize = genFuncletInfo.fiSpDelta1; + + regMaskTP maskArgRegsLiveIn; + if (isFilter) + { + maskArgRegsLiveIn = RBM_A0 | RBM_A1; + } + else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT)) + { + maskArgRegsLiveIn = RBM_NONE; + } + else + { + maskArgRegsLiveIn = RBM_A0; + } + +#ifdef DEBUG + if (compiler->opts.disAsm) + { + printf("DEBUG: CodeGen::genFuncletProlog, frameType:%d\n\n", genFuncletInfo.fiFrameType); + } +#endif + + int offset = 0; + if (genFuncletInfo.fiFrameType == 1) + { + // fiFrameType constraints: + assert(frameSize < 0); + assert(frameSize >= -2048); + + assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040); + genStackPointerAdjustment(frameSize, REG_T6, nullptr, /* reportUnwindData */ true); // TODO CHECK REG_R21 => T6 + + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta); + compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta); + + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, + genFuncletInfo.fiSP_to_FPRA_save_delta + 8); + compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8); + + maskSaveRegsInt &= ~(RBM_RA | RBM_FP); // We've saved these now + + genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, + 0); + } + else if (genFuncletInfo.fiFrameType == 2) + { + // fiFrameType constraints: + assert(frameSize < -2048); + + offset = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta; + int SP_delta = roundUp((UINT)offset, STACK_ALIGN); + offset = SP_delta - offset; + + genStackPointerAdjustment(-SP_delta, REG_T6, nullptr, /* reportUnwindData */ true); // TODO CHECK REG_R21 => T6 + + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, offset); + compiler->unwindSaveReg(REG_FP, offset); + + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8); + compiler->unwindSaveReg(REG_RA, offset + 8); + + maskSaveRegsInt &= ~(RBM_RA | RBM_FP); // We've saved these now + + offset = frameSize + SP_delta + genFuncletInfo.fiSP_to_PSP_slot_delta + 8; + genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, 0); + + genStackPointerAdjustment(frameSize + SP_delta, REG_T6, nullptr, /* reportUnwindData */ true); // TODO CHECK REG_R21 => T6 + } + else + { + unreached(); + } + + // This is the end of the OS-reported prolog for purposes of unwinding + compiler->unwindEndProlog(); + + // If there is no PSPSym (NativeAOT ABI), we are done. Otherwise, we need to set up the PSPSym in the functlet + // frame. + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + if (isFilter) + { + // This is the first block of a filter + // Note that register a1 = CallerSP of the containing function + // A1 is overwritten by the first Load (new callerSP) + // A2 is scratch when we have a large constant offset + + // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or + // function) + genInstrWithConstant(INS_ld, EA_PTRSIZE, REG_A1, REG_A1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta, + REG_A2, false); + regSet.verifyRegUsed(REG_A1); + + // Store the PSP value (aka CallerSP) + genInstrWithConstant(INS_sd, EA_PTRSIZE, REG_A1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, + REG_A2, false); + + // re-establish the frame pointer + genInstrWithConstant(INS_addi, EA_PTRSIZE, REG_FPBASE, REG_A1, + genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false); + } + else // This is a non-filter funclet + { + // A3 is scratch, A2 can also become scratch. + + // compute the CallerSP, given the frame pointer. a3 is scratch? + genInstrWithConstant(INS_addi, EA_PTRSIZE, REG_A3, REG_FPBASE, + -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false); + regSet.verifyRegUsed(REG_A3); + + genInstrWithConstant(INS_sd, EA_PTRSIZE, REG_A3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, + REG_A2, false); + } + } +} + +void CodeGen::genFuncletEpilog() +{ +#ifdef DEBUG + if (verbose) + { + printf("*************** In genFuncletEpilog()\n"); + } +#endif + + ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); + + bool unwindStarted = false; + int frameSize = genFuncletInfo.fiSpDelta1; + + if (!unwindStarted) + { + // We can delay this until we know we'll generate an unwindable instruction, if necessary. + compiler->unwindBegEpilog(); + unwindStarted = true; + } + + regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; + regMaskTP maskRestoreRegsInt = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat; + + // Funclets must always save RA and FP, since when we have funclets we must have an FP frame. + assert((maskRestoreRegsInt & RBM_RA) != 0); + assert((maskRestoreRegsInt & RBM_FP) != 0); + +#ifdef DEBUG + if (compiler->opts.disAsm) + { + printf("DEBUG: CodeGen::genFuncletEpilog, frameType:%d\n\n", genFuncletInfo.fiFrameType); + } +#endif + + regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat; + + assert(frameSize < 0); + if (genFuncletInfo.fiFrameType == 1) + { + // fiFrameType constraints: + assert(frameSize >= -2048); + assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040); + + regsToRestoreMask &= ~(RBM_RA | RBM_FP); // We restore FP/RA at the end + + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0); + + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, + genFuncletInfo.fiSP_to_FPRA_save_delta + 8); + compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta); + compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta); + + // generate daddiu SP,SP,imm + genStackPointerAdjustment(-frameSize, REG_T6, nullptr, /* reportUnwindData */ true); // TODO CHECK REG_R21 => T6 + } + else if (genFuncletInfo.fiFrameType == 2) + { + // fiFrameType constraints: + assert(frameSize < -2048); + + int offset = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta; + int SP_delta = roundUp((UINT)offset, STACK_ALIGN); + offset = SP_delta - offset; + + // first, generate daddiu SP,SP,imm + genStackPointerAdjustment(-frameSize - SP_delta, REG_T6, nullptr, /* reportUnwindData */ true); // TODO CHECK REG_R21 => T6 + + int offset2 = frameSize + SP_delta + genFuncletInfo.fiSP_to_PSP_slot_delta + 8; + assert(offset2 < 2040); // can amend. + + regsToRestoreMask &= ~(RBM_RA | RBM_FP); // We restore FP/RA at the end + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, offset2, 0); + + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8); + compiler->unwindSaveReg(REG_RA, offset + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, offset); + compiler->unwindSaveReg(REG_FP, offset); + + // second, generate daddiu SP,SP,imm for remaine space. + genStackPointerAdjustment(SP_delta, REG_T6, nullptr, /* reportUnwindData */ true); // TODO CHECK REG_R21 => T6 + } + else + { + unreached(); + } + GetEmitter()->emitIns_R_R_I(INS_jalr, emitActualTypeSize(TYP_I_IMPL), REG_R0, REG_RA, 0); + compiler->unwindReturn(REG_RA); + + compiler->unwindEndEpilog(); +} + +void CodeGen::genCaptureFuncletPrologEpilogInfo() +{ + if (!compiler->ehAnyFunclets()) + { + return; + } + + assert(isFramePointerUsed()); + + // The frame size and offsets must be finalized + assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); + + genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta(); + + regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved; + assert((rsMaskSaveRegs & RBM_RA) != 0); + assert((rsMaskSaveRegs & RBM_FP) != 0); + + unsigned PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? 8 : 0; + + unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); + assert((saveRegsCount == compiler->compCalleeRegsPushed) || (saveRegsCount == compiler->compCalleeRegsPushed - 1)); + + unsigned saveRegsPlusPSPSize = + roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize; + + unsigned saveRegsPlusPSPSizeAligned = roundUp(saveRegsPlusPSPSize, STACK_ALIGN); + + assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); + unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN); + + unsigned maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned; + assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0); + + int SP_to_FPRA_save_delta = compiler->lvaOutgoingArgSpaceSize; + + unsigned funcletFrameSize = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize; + unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN); + assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned); + + unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize; + assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES)); + + if (maxFuncletFrameSizeAligned <= (2048 - 8)) + { + genFuncletInfo.fiFrameType = 1; + saveRegsPlusPSPSize -= 2 * 8; // FP/RA + } + else + { + unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize; + assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES)); + + genFuncletInfo.fiFrameType = 2; + saveRegsPlusPSPSize -= 2 * 8; // FP/RA + } + + int CallerSP_to_PSP_slot_delta = -(int)saveRegsPlusPSPSize; + genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned; + int SP_to_PSP_slot_delta = funcletFrameSizeAligned - saveRegsPlusPSPSize; + + /* Now save it for future use */ + genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; + genFuncletInfo.fiSP_to_FPRA_save_delta = SP_to_FPRA_save_delta; + + genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta; + genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta; + +#ifdef DEBUG + if (verbose) + { + printf("\n"); + printf("Funclet prolog / epilog info\n"); + printf(" Save regs: "); + dspRegMask(genFuncletInfo.fiSaveRegs); + printf("\n"); + printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta); + printf(" SP to FP/RA save location delta: %d\n", genFuncletInfo.fiSP_to_FPRA_save_delta); + printf(" Frame type: %d\n", genFuncletInfo.fiFrameType); + printf(" SP delta 1: %d\n", genFuncletInfo.fiSpDelta1); + + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + if (CallerSP_to_PSP_slot_delta != + compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging + { + printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n", + compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); + } + } + } + + assert(genFuncletInfo.fiSP_to_FPRA_save_delta >= 0); +#endif // DEBUG +} + +void CodeGen::genFnEpilog(BasicBlock* block) +{ +#ifdef DEBUG + if (verbose) + { + printf("*************** In genFnEpilog()\n"); + } +#endif // DEBUG + + ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); + + VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, GetEmitter()->emitInitGCrefVars); + gcInfo.gcRegGCrefSetCur = GetEmitter()->emitInitGCrefRegs; + gcInfo.gcRegByrefSetCur = GetEmitter()->emitInitByrefRegs; + +#ifdef DEBUG + if (compiler->opts.dspCode) + { + printf("\n__epilog:\n"); + } + + if (verbose) + { + printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur)); + dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur); + printf(", gcRegGCrefSetCur="); + printRegMaskInt(gcInfo.gcRegGCrefSetCur); + GetEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur); + printf(", gcRegByrefSetCur="); + printRegMaskInt(gcInfo.gcRegByrefSetCur); + GetEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur); + printf("\n"); + } +#endif // DEBUG + + bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0); + + GenTree* lastNode = block->lastNode(); + + // Method handle and address info used in case of jump epilog + CORINFO_METHOD_HANDLE methHnd = nullptr; + CORINFO_CONST_LOOKUP addrInfo; + addrInfo.addr = nullptr; + addrInfo.accessType = IAT_VALUE; + + if (jmpEpilog && (lastNode->gtOper == GT_JMP)) + { + methHnd = (CORINFO_METHOD_HANDLE)lastNode->AsVal()->gtVal1; + compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo); + } + + compiler->unwindBegEpilog(); + + if (jmpEpilog) + { + SetHasTailCalls(true); + + noway_assert(block->bbJumpKind == BBJ_RETURN); + noway_assert(block->GetFirstLIRNode() != nullptr); + + /* figure out what jump we have */ + GenTree* jmpNode = lastNode; +#if !FEATURE_FASTTAILCALL + noway_assert(jmpNode->gtOper == GT_JMP); +#else // FEATURE_FASTTAILCALL + // armarch + // If jmpNode is GT_JMP then gtNext must be null. + // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts. + noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr)); + + // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp + noway_assert((jmpNode->gtOper == GT_JMP) || + ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall())); + + // The next block is associated with this "if" stmt + if (jmpNode->gtOper == GT_JMP) +#endif // FEATURE_FASTTAILCALL + { + // Simply emit a jump to the methodHnd. This is similar to a call so we can use + // the same descriptor with some minor adjustments. + assert(methHnd != nullptr); + assert(addrInfo.addr != nullptr); + + emitter::EmitCallType callType; + void* addr; + regNumber indCallReg; + switch (addrInfo.accessType) + { + case IAT_VALUE: + // TODO-LOONGARCH64-CQ: using B/BL for optimization. + case IAT_PVALUE: + // Load the address into a register, load indirect and call through a register + // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use + callType = emitter::EC_INDIR_R; + indCallReg = REG_INDIRECT_CALL_TARGET_REG; + addr = NULL; + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr); + if (addrInfo.accessType == IAT_PVALUE) + { + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, indCallReg, indCallReg, 0); + regSet.verifyRegUsed(indCallReg); + } + break; + + case IAT_RELPVALUE: + { + // Load the address into a register, load relative indirect and call through a register + // We have to use R12 since we assume the argument registers are in use + // LR is used as helper register right before it is restored from stack, thus, + // all relative address calculations are performed before LR is restored. + callType = emitter::EC_INDIR_R; + indCallReg = REG_T2; + addr = NULL; + + regSet.verifyRegUsed(indCallReg); + break; + } + + case IAT_PPVALUE: + default: + NO_WAY("Unsupported JMP indirection"); + } + + /* Simply emit a jump to the methodHnd. This is similar to a call so we can use + * the same descriptor with some minor adjustments. + */ + + genPopCalleeSavedRegisters(true); + + // clang-format off + GetEmitter()->emitIns_Call(callType, + methHnd, + INDEBUG_LDISASM_COMMA(nullptr) + addr, + 0, // argSize + EA_UNKNOWN // retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize + gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, + DebugInfo(), + indCallReg, // ireg + REG_NA, // xreg + 0, // xmul + 0, // disp + true); // isJump + // clang-format on + CLANG_FORMAT_COMMENT_ANCHOR; + } +#if FEATURE_FASTTAILCALL + else + { + genPopCalleeSavedRegisters(true); + genCallInstruction(jmpNode->AsCall()); + } +#endif // FEATURE_FASTTAILCALL + } + else + { + genPopCalleeSavedRegisters(false); + + GetEmitter()->emitIns_R_R_I(INS_jalr, EA_PTRSIZE, REG_R0, REG_RA, 0); + compiler->unwindReturn(REG_RA); + } + + compiler->unwindEndEpilog(); +} + +void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + if (compiler->lvaPSPSym == BAD_VAR_NUM) + { + return; + } + + noway_assert(isFramePointerUsed()); // We need an explicit frame pointer + + int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta(); + + // We will just use the initReg since it is an available register + // and we are probably done using it anyway... + regNumber regTmp = initReg; + *pInitRegZeroed = false; + + genInstrWithConstant(INS_addi, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta, REG_RA, false); // TODO R21 => RA + GetEmitter()->emitIns_S_R(INS_sd, EA_PTRSIZE, regTmp, REG_NA, compiler->lvaPSPSym, 0); +} + +void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed) +{ + regNumber rAddr; + regNumber rCnt = REG_NA; // Invalid + regMaskTP regMask; + + regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers + // see: src/jit/registerloongarch64.h + availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are + // currently live + availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for + // a large constant. + + rAddr = initReg; + *pInitRegZeroed = false; + + // rAddr is not a live incoming argument reg + assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); + assert(untrLclLo % 4 == 0); + + if (emitter::isValidSimm12(untrLclLo)) + { + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo); + } + else + { + // Load immediate into the InitReg register + instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo); + GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg); + *pInitRegZeroed = false; + } + + bool useLoop = false; + unsigned uCntBytes = untrLclHi - untrLclLo; + assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes. + unsigned int padding = untrLclLo & 0x7; + + if (padding) + { + assert(padding == 4); + GetEmitter()->emitIns_R_R_I(INS_sw, EA_4BYTE, REG_R0, rAddr, 0); + uCntBytes -= 4; + } + + unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use. + + // When uCntSlots is 9 or less, we will emit a sequence of sd instructions inline. + // When it is 10 or greater, we will emit a loop containing a sd instruction. + // In both of these cases the sd instruction will write two zeros to memory + // and we will use a single str instruction at the end whenever we have an odd count. + if (uCntSlots >= 10) + useLoop = true; + + if (useLoop) + { + // We pick the next lowest register number for rCnt + noway_assert(availMask != RBM_NONE); + regMask = genFindLowestBit(availMask); + rCnt = genRegNumFromMask(regMask); + availMask &= ~regMask; + + noway_assert(uCntSlots >= 2); + assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // rCnt is not a live incoming + // argument reg + instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2); + + // TODO-LOONGARCH64: maybe optimize further + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rCnt, rCnt, -1); + + // bne rCnt, zero, -4 * 4 + ssize_t imm = -16; + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES); + GetEmitter()->emitIns_R_R_I(INS_bne, EA_PTRSIZE, rCnt, REG_R0, imm); + + uCntBytes %= REGSIZE_BYTES * 2; + } + else + { + while (uCntBytes >= REGSIZE_BYTES * 2) + { + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES + padding); + uCntBytes -= REGSIZE_BYTES * 2; + padding = 0; + } + } + + if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number) + { + if ((uCntBytes - REGSIZE_BYTES) == 0) + { + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding); + } + else + { + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, REGSIZE_BYTES); + } + uCntBytes -= REGSIZE_BYTES; + } + if (uCntBytes > 0) + { + assert(uCntBytes == sizeof(int)); + GetEmitter()->emitIns_R_R_I(INS_sw, EA_4BYTE, REG_R0, rAddr, padding); + uCntBytes -= sizeof(int); + } + noway_assert(uCntBytes == 0); +} + +void CodeGen::inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock) +{ +#if !FEATURE_FIXED_OUT_ARGS + assert((tgtBlock->bbTgtStkDepth * sizeof(int) == genStackLevel) || isFramePointerUsed()); +#endif // !FEATURE_FIXED_OUT_ARGS + + GetEmitter()->emitIns_J(emitter::emitJumpKindToIns(jmp), tgtBlock); +} + +BasicBlock* CodeGen::genCallFinally(BasicBlock* block) +{ + // Generate a call to the finally, like this: + // mov a0,qword ptr [fp + 10H] / sp // Load a0 with PSPSym, or sp if PSPSym is not used + // jal finally-funclet + // j finally-return // Only for non-retless finally calls + // The 'b' can be a NOP if we're going to the next block. + + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + GetEmitter()->emitIns_R_S(INS_ld, EA_PTRSIZE, REG_A0, compiler->lvaPSPSym, 0); + } + else + { + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_A0, REG_SPBASE, 0); + } + GetEmitter()->emitIns_J(INS_jal, block->bbJumpDest); + + if (block->bbFlags & BBF_RETLESS_CALL) + { + // We have a retless call, and the last instruction generated was a call. + // If the next block is in a different EH region (or is the end of the code + // block), then we need to generate a breakpoint here (since it will never + // get executed) to get proper unwind behavior. + + if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) + { + instGen(INS_ebreak); // This should never get executed + } + } + else + { + // Because of the way the flowgraph is connected, the liveness info for this one instruction + // after the call is not (can not be) correct in cases where a variable has a last use in the + // handler. So turn off GC reporting for this single instruction. + GetEmitter()->emitDisableGC(); + + // Now go to where the finally funclet needs to return to. + if (block->bbNext->bbJumpDest == block->bbNext->bbNext) + { + // Fall-through. + // TODO-LOONGARCH64-CQ: Can we get rid of this instruction, and just have the call return directly + // to the next instruction? This would depend on stack walking from within the finally + // handler working without this instruction being in this special EH region. + instGen(INS_nop); + } + else + { + inst_JMP(EJ_jmp, block->bbNext->bbJumpDest); + } + + GetEmitter()->emitEnableGC(); + } + + // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the + // jump target using bbJumpDest - that is already used to point + // to the finally block. So just skip past the BBJ_ALWAYS unless the + // block is RETLESS. + if (!(block->bbFlags & BBF_RETLESS_CALL)) + { + assert(block->isBBCallAlwaysPair()); + block = block->bbNext; + } + return block; +} + +void CodeGen::genEHCatchRet(BasicBlock* block) +{ + GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, block->bbJumpDest, REG_INTRET); +} + +// move an immediate value into an integer register +void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, + regNumber reg, + ssize_t imm, + insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags)) +{ + emitter* emit = GetEmitter(); + + if (!compiler->opts.compReloc) + { + size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs. + } + + if (EA_IS_RELOC(size)) + { + assert(genIsValidIntReg(reg)); + emit->emitIns_R_AI(INS_jalr, size, reg, imm); // for example: EA_PTR_DSP_RELOC + } + else + { + emit->emitIns_I_la(size, reg, imm); + } + + regSet.verifyRegUsed(reg); +} + +void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree) +{ + switch (tree->gtOper) + { + case GT_CNS_INT: + { + // relocatable values tend to come down as a CNS_INT of native int type + // so the line between these two opcodes is kind of blurry + GenTreeIntCon* con = tree->AsIntCon(); + ssize_t cnsVal = con->IconValue(); + + emitAttr attr = emitActualTypeSize(targetType); + // TODO-CQ: Currently we cannot do this for all handles because of + // https://github.com/dotnet/runtime/issues/60712 + if (con->ImmedValNeedsReloc(compiler)) + { + attr = EA_SET_FLG(attr, EA_CNS_RELOC_FLG); + } + + if (targetType == TYP_BYREF) + { + attr = EA_SET_FLG(attr, EA_BYREF_FLG); + } + + instGen_Set_Reg_To_Imm(attr, targetReg, cnsVal, + INS_FLAGS_DONT_CARE DEBUGARG(con->gtTargetHandle) DEBUGARG(con->gtFlags)); + regSet.verifyRegUsed(targetReg); + } + break; + + case GT_CNS_DBL: + { + emitter* emit = GetEmitter(); + emitAttr size = emitActualTypeSize(tree); + double constValue = tree->AsDblCon()->DconValue(); + + // Make sure we use "daddiu reg, zero, 0x00" only for positive zero (0.0) + // and not for negative zero (-0.0) + if (*(__int64*)&constValue == 0) + { + // A faster/smaller way to generate 0.0 + // We will just zero out the entire vector register for both float and double + emit->emitIns_R_R(INS_fmv_d_x, EA_8BYTE, targetReg, REG_R0); + } + else + { + // Get a temp integer register to compute long address. + // regNumber addrReg = tree->GetSingleTempReg(); + + // We must load the FP constant from the constant pool + // Emit a data section constant for the float or double constant. + CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(constValue, size); + + // Load the FP constant. + assert(targetReg >= REG_F0); + + instruction ins = size == EA_4BYTE ? INS_flw : INS_fld; + + // Compute the address of the FP constant and load the data. + emit->emitIns_R_C(ins, size, targetReg, REG_NA, hnd, 0); + } + } + break; + + default: + unreached(); + } +} + +// Produce code for a GT_INC_SATURATE node. +void CodeGen::genCodeForIncSaturate(GenTree* tree) +{ + NYI("unimplemented on RISCV64 yet"); +} + +// Generate code to get the high N bits of a N*N=2N bit multiplication result +void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) +{ + assert(!treeNode->gtOverflowEx()); + + genConsumeOperands(treeNode); + + regNumber targetReg = treeNode->GetRegNum(); + var_types targetType = treeNode->TypeGet(); + emitter* emit = GetEmitter(); + emitAttr attr = emitActualTypeSize(treeNode); + unsigned isUnsigned = (treeNode->gtFlags & GTF_UNSIGNED); + + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* op2 = treeNode->gtGetOp2(); + + assert(!varTypeIsFloating(targetType)); + + // op1 and op2 can only be a reg at present, will amend in the future. + assert(!op1->isContained()); + assert(!op2->isContained()); + + // The arithmetic node must be sitting in a register (since it's not contained) + assert(targetReg != REG_NA); + + if (EA_SIZE(attr) == EA_8BYTE) + { + instruction ins = isUnsigned ? INS_mulhu : INS_mulh; + + emit->emitIns_R_R_R(ins, attr, targetReg, op1->GetRegNum(), op2->GetRegNum()); + } + else + { + assert(EA_SIZE(attr) == EA_4BYTE); + if (isUnsigned) + { + emit->emitIns_R_R_I(INS_slli, EA_8BYTE, REG_RA, op1->GetRegNum(), 32); + emit->emitIns_R_R_I(INS_slli, EA_8BYTE, targetReg, op2->GetRegNum(), 32); + emit->emitIns_R_R_R(INS_mulhu, EA_8BYTE, targetReg, REG_RA, targetReg); + emit->emitIns_R_R_I(INS_srai, attr, targetReg, targetReg, 32); + } + else + { + emit->emitIns_R_R_R(INS_mul, EA_8BYTE, targetReg, op1->GetRegNum(), op2->GetRegNum()); + emit->emitIns_R_R_I(INS_srai, attr, targetReg, targetReg, 32); + } + + } + + genProduceReg(treeNode); +} + +// Generate code for ADD, SUB, MUL, AND, AND_NOT, OR and XOR +// This method is expected to have called genConsumeOperands() before calling it. +void CodeGen::genCodeForBinary(GenTreeOp* treeNode) +{ + const genTreeOps oper = treeNode->OperGet(); + regNumber targetReg = treeNode->GetRegNum(); + emitter* emit = GetEmitter(); + + assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_AND_NOT, GT_OR, GT_XOR)); + + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* op2 = treeNode->gtGetOp2(); + instruction ins = genGetInsForOper(treeNode); + + // The arithmetic node must be sitting in a register (since it's not contained) + assert(targetReg != REG_NA); + + regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2); + assert(r == targetReg); + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCodeForLclVar: Produce code for a GT_LCL_VAR node. +// +// Arguments: +// tree - the GT_LCL_VAR node +// +void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) +{ + unsigned varNum = tree->GetLclNum(); + assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + bool isRegCandidate = varDsc->lvIsRegCandidate(); + + // lcl_vars are not defs + assert((tree->gtFlags & GTF_VAR_DEF) == 0); + + // If this is a register candidate that has been spilled, genConsumeReg() will + // reload it at the point of use. Otherwise, if it's not in a register, we load it here. + + if (!isRegCandidate && !tree->IsMultiReg() && !(tree->gtFlags & GTF_SPILLED)) + { + var_types targetType = varDsc->GetRegisterType(tree); + // targetType must be a normal scalar type and not a TYP_STRUCT + assert(targetType != TYP_STRUCT); + + instruction ins = ins_Load(targetType); + emitAttr attr = emitTypeSize(targetType); + + GetEmitter()->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0); + genProduceReg(tree); + } +} + +//------------------------------------------------------------------------ +// genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node. +// +// Arguments: +// tree - the GT_STORE_LCL_FLD node +// +void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) +{ + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + emitter* emit = GetEmitter(); + noway_assert(targetType != TYP_STRUCT); + +#ifdef FEATURE_SIMD + // storing of TYP_SIMD12 (i.e. Vector3) field + if (tree->TypeGet() == TYP_SIMD12) + { + genStoreLclTypeSIMD12(tree); + return; + } +#endif // FEATURE_SIMD + + // record the offset + unsigned offset = tree->GetLclOffs(); + + // We must have a stack store with GT_STORE_LCL_FLD + noway_assert(targetReg == REG_NA); + + unsigned varNum = tree->GetLclNum(); + assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + + // Ensure that lclVar nodes are typed correctly. + assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); + + GenTree* data = tree->gtOp1; + genConsumeRegs(data); + + regNumber dataReg = REG_NA; + if (data->isContainedIntOrIImmed()) + { + assert(data->IsIntegralConst(0)); + dataReg = REG_R0; + } + else if (data->isContained()) + { + assert(data->OperIs(GT_BITCAST)); + const GenTree* bitcastSrc = data->AsUnOp()->gtGetOp1(); + assert(!bitcastSrc->isContained()); + dataReg = bitcastSrc->GetRegNum(); + } + else + { + assert(!data->isContained()); + dataReg = data->GetRegNum(); + } + assert(dataReg != REG_NA); + + instruction ins = ins_StoreFromSrc(dataReg, targetType); + + emitAttr attr = emitTypeSize(targetType); + + emit->emitIns_S_R(ins, attr, dataReg, REG_NA, varNum, offset); + + genUpdateLife(tree); + + varDsc->SetRegNum(REG_STK); +} + +//------------------------------------------------------------------------ +// genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node. +// +// Arguments: +// lclNode - the GT_STORE_LCL_VAR node +// +void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) +{ + GenTree* data = lclNode->gtOp1; + + // var = call, where call returns a multi-reg return value + // case is handled separately. + if (data->gtSkipReloadOrCopy()->IsMultiRegNode()) + { + genMultiRegStoreToLocal(lclNode); + return; + } + + LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); + if (lclNode->IsMultiReg()) + { + NYI_LOONGARCH64("genCodeForStoreLclVar : unimplemented on RISCV64 yet"); + regNumber operandReg = genConsumeReg(data); + unsigned int regCount = varDsc->lvFieldCnt; + for (unsigned i = 0; i < regCount; ++i) + { + regNumber varReg = lclNode->GetRegByIndex(i); + assert(varReg != REG_NA); + unsigned fieldLclNum = varDsc->lvFieldLclStart + i; + LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(fieldLclNum); + assert(fieldVarDsc->TypeGet() == TYP_FLOAT); + GetEmitter()->emitIns_R_R_I(INS_sd, emitTypeSize(TYP_FLOAT), varReg, operandReg, i); + } + genProduceReg(lclNode); + } + else + { + regNumber targetReg = lclNode->GetRegNum(); + emitter* emit = GetEmitter(); + unsigned varNum = lclNode->GetLclNum(); + var_types targetType = varDsc->GetRegisterType(lclNode); + +#ifdef FEATURE_SIMD + // storing of TYP_SIMD12 (i.e. Vector3) field + if (lclNode->TypeGet() == TYP_SIMD12) + { + genStoreLclTypeSIMD12(lclNode); + return; + } +#endif // FEATURE_SIMD + + genConsumeRegs(data); + + regNumber dataReg = REG_NA; + if (data->isContained()) + { + // This is only possible for a zero-init or bitcast. + const bool zeroInit = data->IsIntegralConst(0); + + // TODO-RISCV64-CQ: supporting the SIMD. + assert(!varTypeIsSIMD(targetType)); + + if (zeroInit) + { + dataReg = REG_R0; + } + else if (data->IsIntegralConst()) + { + ssize_t imm = data->AsIntConCommon()->IconValue(); + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + dataReg = REG_RA; // TODO CHECK SIDE EFFECT WHEN REG_R21 => REG_RA + } + else + { + assert(data->OperIs(GT_BITCAST)); + const GenTree* bitcastSrc = data->AsUnOp()->gtGetOp1(); + assert(!bitcastSrc->isContained()); + dataReg = bitcastSrc->GetRegNum(); + } + } + else + { + assert(!data->isContained()); + dataReg = data->GetRegNum(); + } + assert(dataReg != REG_NA); + + if (targetReg == REG_NA) // store into stack based LclVar + { + inst_set_SV_var(lclNode); + + instruction ins = ins_StoreFromSrc(dataReg, targetType); + emitAttr attr = emitActualTypeSize(targetType); + + emit->emitIns_S_R(ins, attr, dataReg, REG_NA, varNum, /* offset */ 0); + + genUpdateLife(lclNode); + + varDsc->SetRegNum(REG_STK); + } + else // store into register (i.e move into register) + { + if (dataReg != targetReg) + { + // Assign into targetReg when dataReg (from op1) is not the same register + inst_Mov(targetType, targetReg, dataReg, true, emitActualTypeSize(targetType)); + } + genProduceReg(lclNode); + } + } +} + +void CodeGen::genSimpleReturn(GenTree* treeNode) +{ + assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT); + GenTree* op1 = treeNode->gtGetOp1(); + var_types targetType = treeNode->TypeGet(); + + assert(targetType != TYP_STRUCT); + assert(targetType != TYP_VOID); + + regNumber retReg = varTypeUsesFloatArgReg(treeNode) ? REG_FLOATRET : REG_INTRET; + + bool movRequired = (op1->GetRegNum() != retReg); + + if (!movRequired) + { + if (op1->OperGet() == GT_LCL_VAR) + { + GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); + bool isRegCandidate = compiler->lvaTable[lcl->GetLclNum()].lvIsRegCandidate(); + if (isRegCandidate && ((op1->gtFlags & GTF_SPILLED) == 0)) + { + // We may need to generate a zero-extending mov instruction to load the value from this GT_LCL_VAR + + unsigned lclNum = lcl->GetLclNum(); + LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); + var_types op1Type = genActualType(op1->TypeGet()); + var_types lclType = genActualType(varDsc->TypeGet()); + + if (genTypeSize(op1Type) < genTypeSize(lclType)) + { + movRequired = true; + } + } + } + } + if (movRequired) + { + emitAttr attr = emitActualTypeSize(targetType); + if (varTypeUsesFloatArgReg(treeNode)) + { + if (attr == EA_4BYTE) + { + GetEmitter()->emitIns_R_R_R(INS_fsgnj_s, attr, retReg, op1->GetRegNum(), op1->GetRegNum()); + } + else + { + GetEmitter()->emitIns_R_R_R(INS_fsgnj_d, attr, retReg, op1->GetRegNum(), op1->GetRegNum()); + } + } + else + { + if (attr == EA_4BYTE) + { + if ((treeNode->gtFlags & GTF_UNSIGNED) != 0) + { + GetEmitter()->emitIns_R_R_I(INS_slli, EA_PTRSIZE, retReg, op1->GetRegNum(), 32); + GetEmitter()->emitIns_R_R_I(INS_srli, EA_PTRSIZE, retReg, retReg, 32); + } + else + { + GetEmitter()->emitIns_R_R_I(INS_addiw, attr, retReg, op1->GetRegNum(), 0); + } + } + else + GetEmitter()->emitIns_R_R_I(INS_addi, attr, retReg, op1->GetRegNum(), 0); + } + } +} + +/*********************************************************************************************** + * Generate code for localloc + */ +void CodeGen::genLclHeap(GenTree* tree) +{ + assert(tree->OperGet() == GT_LCLHEAP); + assert(compiler->compLocallocUsed); + + emitter* emit = GetEmitter(); + GenTree* size = tree->AsOp()->gtOp1; + noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); + + regNumber targetReg = tree->GetRegNum(); + regNumber regCnt = REG_NA; + regNumber pspSymReg = REG_NA; + var_types type = genActualType(size->gtType); + emitAttr easz = emitTypeSize(type); + BasicBlock* endLabel = nullptr; // can optimize for loongarch. + unsigned stackAdjustment = 0; + const target_ssize_t ILLEGAL_LAST_TOUCH_DELTA = (target_ssize_t)-1; + target_ssize_t lastTouchDelta = + ILLEGAL_LAST_TOUCH_DELTA; // The number of bytes from SP to the last stack address probed. + + noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes + noway_assert(genStackLevel == 0); // Can't have anything on the stack + + // compute the amount of memory to allocate to properly STACK_ALIGN. + size_t amount = 0; + if (size->IsCnsIntOrI()) + { + // If size is a constant, then it must be contained. + assert(size->isContained()); + + // If amount is zero then return null in targetReg + amount = size->AsIntCon()->gtIconVal; + if (amount == 0) + { + instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg); + goto BAILOUT; + } + + // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN + amount = AlignUp(amount, STACK_ALIGN); + } + else + { + // If 0 bail out by returning null in targetReg + genConsumeRegAndCopy(size, targetReg); + endLabel = genCreateTempLabel(); + emit->emitIns_J_cond_la(INS_beq, endLabel, targetReg, REG_R0); + + // Compute the size of the block to allocate and perform alignment. + // If compInitMem=true, we can reuse targetReg as regcnt, + // since we don't need any internal registers. + if (compiler->info.compInitMem) + { + assert(tree->AvailableTempRegCount() == 0); + regCnt = targetReg; + } + else + { + regCnt = tree->ExtractTempReg(); + if (regCnt != targetReg) + { + emit->emitIns_R_R_I(INS_ori, easz, regCnt, targetReg, 0); + } + } + + // Align to STACK_ALIGN + // regCnt will be the total number of bytes to localloc + inst_RV_IV(INS_addi, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type)); + + assert(regCnt != REG_RA); // TODO CHECK REG_R21 => RA + ssize_t imm2 = ~(STACK_ALIGN - 1); + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_RA, REG_R0, imm2); // TODO CHECK REG_R21 => RA + emit->emitIns_R_R_R(INS_and, emitActualTypeSize(type), regCnt, regCnt, REG_RA); // TODO CHECK REG_R21 => RA + } + + // If we have an outgoing arg area then we must adjust the SP by popping off the + // outgoing arg area. We will restore it right before we return from this method. + // + // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following + // are the cases that need to be handled: + // i) Method has out-going arg area. + // It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs). + // Therefore, we will pop off the out-going arg area from the stack pointer before allocating the localloc + // space. + // ii) Method has no out-going arg area. + // Nothing to pop off from the stack. + if (compiler->lvaOutgoingArgSpaceSize > 0) + { + unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN); + // assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain + // // aligned + genInstrWithConstant(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, outgoingArgSpaceAligned, rsGetRsvdReg()); + stackAdjustment += outgoingArgSpaceAligned; + } + + if (size->IsCnsIntOrI()) + { + // We should reach here only for non-zero, constant size allocations. + assert(amount > 0); + ssize_t imm = -16; + + // For small allocations we will generate up to four stp instructions, to zero 16 to 64 bytes. + static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2)); + assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time + size_t stpCount = amount / (REGSIZE_BYTES * 2); + if (compiler->info.compInitMem) + { + if (stpCount <= 4) + { + imm = -16 * stpCount; + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); + + imm = -imm; + while (stpCount != 0) + { + imm -= 8; + emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, imm); + imm -= 8; + emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, imm); + stpCount -= 1; + } + + lastTouchDelta = 0; + + goto ALLOC_DONE; + } + } + else if (amount < compiler->eeGetPageSize()) // must be < not <= + { + // Since the size is less than a page, simply adjust the SP value. + // The SP might already be in the guard page, so we must touch it BEFORE + // the alloc, not after. + + // ld_w r0, 0(SP) + emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, REG_SP, 0); + + lastTouchDelta = amount; + imm = -(ssize_t)amount; + if (emitter::isValidSimm12(imm)) + { + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, rsGetRsvdReg(), amount); + emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, rsGetRsvdReg()); + } + + goto ALLOC_DONE; + } + + // else, "mov regCnt, amount" + // If compInitMem=true, we can reuse targetReg as regcnt. + // Since size is a constant, regCnt is not yet initialized. + assert(regCnt == REG_NA); + if (compiler->info.compInitMem) + { + assert(tree->AvailableTempRegCount() == 0); + regCnt = targetReg; + } + else + { + regCnt = tree->ExtractTempReg(); + } + instGen_Set_Reg_To_Imm(((unsigned int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount); + } + + if (compiler->info.compInitMem) + { + // At this point 'regCnt' is set to the total number of bytes to locAlloc. + // Since we have to zero out the allocated memory AND ensure that the stack pointer is always valid + // by tickling the pages, we will just push 0's on the stack. + // + // Note: regCnt is guaranteed to be even on Amd64 since STACK_ALIGN/TARGET_POINTER_SIZE = 2 + // and localloc size is a multiple of STACK_ALIGN. + + // Loop: + ssize_t imm = -16; + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); + + emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, 8); + emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, 0); + + // If not done, loop + // Note that regCnt is the number of bytes to stack allocate. + // Therefore we need to subtract 16 from regcnt here. + assert(genIsValidIntReg(regCnt)); + + emit->emitIns_R_R_I(INS_addi, emitActualTypeSize(type), regCnt, regCnt, -16); + + assert(imm == (-4 << 2)); // goto loop. + emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, (-4 << 2)); + + lastTouchDelta = 0; + } + else + { + // At this point 'regCnt' is set to the total number of bytes to localloc. + // + // We don't need to zero out the allocated memory. However, we do have + // to tickle the pages to ensure that SP is always valid and is + // in sync with the "stack guard page". Note that in the worst + // case SP is on the last byte of the guard page. Thus you must + // touch SP-0 first not SP-0x1000. + // + // This is similar to the prolog code in CodeGen::genAllocLclFrame(). + // + // Note that we go through a few hoops so that SP never points to + // illegal pages at any time during the tickling process. + // + // sltu RA, SP, regCnt + // sub regCnt, SP, regCnt // regCnt now holds ultimate SP + // beq RA, REG_R0, Skip + // addi regCnt, REG_R0, 0 + // + // Skip: + // sub regCnt, SP, regCnt + // + // lui regTmp, eeGetPageSize()>>12 + // Loop: + // lw r0, 0(SP) // tickle the page - read from the page + // sub RA, SP, regTmp // decrement SP by eeGetPageSize() + // bltu RA, regCnt, Done + // sub SP, SP,regTmp + // j Loop + // + // Done: + // mov SP, regCnt + // + + // Setup the regTmp + regNumber regTmp = tree->GetSingleTempReg(); + + assert(regCnt != REG_RA); + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, REG_RA, REG_SPBASE, regCnt); // TODO CHECK REG_R21 => RA + + //// subu regCnt, SP, regCnt // regCnt now holds ultimate SP + emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt); + + // Overflow, set regCnt to lowest possible value + emit->emitIns_R_R_I(INS_beq, EA_PTRSIZE, REG_RA, REG_R0, 2 << 2); + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, regCnt, REG_R0, 0); + + assert(compiler->eeGetPageSize() == ((compiler->eeGetPageSize() >> 12) << 12)); + emit->emitIns_R_I(INS_lui, EA_PTRSIZE, regTmp, compiler->eeGetPageSize() >> 12); + + // genDefineTempLabel(loop); + + // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page + emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, REG_SPBASE, 0); + + // decrement SP by eeGetPageSize() + emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, REG_RA, REG_SPBASE, regTmp); // TODO CHECK REG_R21 => RA + + assert(regTmp != REG_RA); // TODO CHECK REG_R21 => RA + + ssize_t imm = 3 << 2; // goto done. + emit->emitIns_R_R_I(INS_bltu, EA_PTRSIZE, REG_RA, regCnt, imm); // TODO CHECK REG_R21 => RA + + emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, regTmp); + + imm = -4 << 2; + // Jump to loop and tickle new stack address + emit->emitIns_I(INS_j, EA_PTRSIZE, imm); + + // Done with stack tickle loop + // genDefineTempLabel(done); + + // Now just move the final value to SP + emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_SPBASE, regCnt, 0); + + // lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space, + // we're going to assume the worst and probe. + } + +ALLOC_DONE: + // Re-adjust SP to allocate outgoing arg area. We must probe this adjustment. + if (stackAdjustment != 0) + { + assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned + assert((lastTouchDelta == ILLEGAL_LAST_TOUCH_DELTA) || (lastTouchDelta >= 0)); + + const regNumber tmpReg = rsGetRsvdReg(); + + if ((lastTouchDelta == ILLEGAL_LAST_TOUCH_DELTA) || + (stackAdjustment + (unsigned)lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > + compiler->eeGetPageSize())) + { + genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, tmpReg); + } + else + { + genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, tmpReg); + } + + // Return the stackalloc'ed address in result register. + // TargetReg = SP + stackAdjustment. + // + genInstrWithConstant(INS_addi, EA_PTRSIZE, targetReg, REG_SPBASE, (ssize_t)stackAdjustment, tmpReg); + } + else // stackAdjustment == 0 + { + // Move the final value of SP to targetReg + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, targetReg, REG_SPBASE, 0); + } + +BAILOUT: + if (endLabel != nullptr) + genDefineTempLabel(endLabel); + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForNegNot(GenTree* tree) +{ + assert(tree->OperIs(GT_NEG, GT_NOT)); + + var_types targetType = tree->TypeGet(); + + assert(!tree->OperIs(GT_NOT) || !varTypeIsFloating(targetType)); + + regNumber targetReg = tree->GetRegNum(); + + // The arithmetic node must be sitting in a register (since it's not contained) + assert(!tree->isContained()); + // The dst can only be a register. + assert(targetReg != REG_NA); + + GenTree* operand = tree->gtGetOp1(); + assert(!operand->isContained()); + // The src must be a register. + regNumber operandReg = genConsumeReg(operand); + + emitAttr attr = emitActualTypeSize(tree); + if (tree->OperIs(GT_NEG)) + { + GetEmitter()->emitIns_R_R_I(INS_xori, attr, targetReg, operandReg, -1); + } + else if (tree->OperIs(GT_NOT)) + { + GetEmitter()->emitIns_R_R_R(INS_sub, attr, targetReg, REG_R0, operandReg); // TODO CHECK BETTER WAY + } + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForBswap: Produce code for a GT_BSWAP / GT_BSWAP16 node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForBswap(GenTree* tree) +{ + NYI_RISCV64("genCodeForBswap unimpleement yet"); +} + +//------------------------------------------------------------------------ +// genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node. +// (1) float/double MOD is morphed into a helper call by front-end. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForDivMod(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_MOD, GT_UMOD, GT_DIV, GT_UDIV)); + + var_types targetType = tree->TypeGet(); + emitter* emit = GetEmitter(); + + genConsumeOperands(tree); + + if (varTypeIsFloating(targetType)) + { + // Floating point divide never raises an exception + assert(varTypeIsFloating(tree->gtOp1)); + assert(varTypeIsFloating(tree->gtOp2)); + assert(tree->gtOper == GT_DIV); + // genCodeForBinary(tree); + instruction ins = genGetInsForOper(tree); + emit->emitIns_R_R_R(ins, emitActualTypeSize(targetType), tree->GetRegNum(), tree->gtOp1->GetRegNum(), + tree->gtOp2->GetRegNum()); + } + else // an integer divide operation + { + GenTree* divisorOp = tree->gtGetOp2(); + // divisorOp can be immed or reg + assert(!divisorOp->isContained() || divisorOp->isContainedIntOrIImmed()); + + if (divisorOp->IsIntegralConst(0) || divisorOp->GetRegNum() == REG_R0) + { + // We unconditionally throw a divide by zero exception + genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO); + } + else // the divisor is not the constant zero + { + GenTree* src1 = tree->gtOp1; + unsigned TypeSize = genTypeSize(genActualType(tree->TypeGet())); + emitAttr size = EA_ATTR(TypeSize); + + assert(TypeSize >= genTypeSize(genActualType(src1->TypeGet())) && + TypeSize >= genTypeSize(genActualType(divisorOp->TypeGet()))); + + // ssize_t intConstValue = divisorOp->AsIntCon()->gtIconVal; + regNumber Reg1 = src1->GetRegNum(); + regNumber divisorReg = divisorOp->GetRegNum(); + instruction ins; + + // Check divisorOp first as we can always allow it to be a contained immediate + if (divisorOp->isContainedIntOrIImmed()) + { + ssize_t intConst = (int)(divisorOp->AsIntCon()->gtIconVal); + divisorReg = REG_RA; // TODO REG_R21 => REG_RA + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, intConst); // TODO REG_R21 => REG_RA + } + // Only for commutative operations do we check src1 and allow it to be a contained immediate + else if (tree->OperIsCommutative()) + { + // src1 can be immed or reg + assert(!src1->isContained() || src1->isContainedIntOrIImmed()); + + // Check src1 and allow it to be a contained immediate + if (src1->isContainedIntOrIImmed()) + { + assert(!divisorOp->isContainedIntOrIImmed()); + ssize_t intConst = (int)(src1->AsIntCon()->gtIconVal); + Reg1 = REG_RA; // TODO REG_R21 => REG_RA + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, intConst); // TODO REG_R21 => REG_RA + } + } + else + { + // src1 can only be a reg + assert(!src1->isContained()); + } + + // Generate the require runtime checks for GT_DIV or GT_UDIV + if (tree->gtOper == GT_DIV || tree->gtOper == GT_MOD) + { + // Two possible exceptions: + // (AnyVal / 0) => DivideByZeroException + // (MinInt / -1) => ArithmeticException + // + bool checkDividend = true; + + // Do we have an immediate for the 'divisorOp'? + // + if (divisorOp->IsCnsIntOrI()) + { + ssize_t intConstValue = divisorOp->AsIntCon()->gtIconVal; + // assert(intConstValue != 0); // already checked above by IsIntegralConst(0) + if (intConstValue != -1) + { + checkDividend = false; // We statically know that the dividend is not -1 + } + } + else // insert check for division by zero + { + // Check if the divisor is zero throw a DivideByZeroException + genJumpToThrowHlpBlk_la(SCK_DIV_BY_ZERO, INS_beq, divisorReg); + } + + if (checkDividend) + { + // Check if the divisor is not -1 branch to 'sdivLabel' + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_RA, REG_R0, -1); // TODO REG_R21 => REG_RA + BasicBlock* sdivLabel = genCreateTempLabel(); // can optimize for riscv64. + emit->emitIns_J_cond_la(INS_bne, sdivLabel, REG_RA, divisorReg); // TODO REG_R21 => REG_RA + + // If control flow continues past here the 'divisorReg' is known to be -1 + regNumber dividendReg = tree->gtGetOp1()->GetRegNum(); + // At this point the divisor is known to be -1 + // + // Whether dividendReg is MinInt or not + // + + emit->emitIns_J_cond_la(INS_beq, sdivLabel, dividendReg, REG_R0); + + emit->emitIns_R_R_R(size == EA_4BYTE ? INS_addw : INS_add, size, REG_RA, dividendReg, // TODO REG_R21 => REG_RA + dividendReg); + genJumpToThrowHlpBlk_la(SCK_ARITH_EXCPN, INS_beq, REG_RA); // TODO REG_R21 => REG_RA + genDefineTempLabel(sdivLabel); + } + + // Generate the sdiv instruction + if (size == EA_4BYTE) + { + if (tree->OperGet() == GT_DIV) + { + ins = INS_divw; + } + else + { + ins = INS_remw; + } + } + else + { + if (tree->OperGet() == GT_DIV) + { + ins = INS_div; + } + else + { + ins = INS_rem; + } + } + + emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg); + } + else // if (tree->gtOper == GT_UDIV) GT_UMOD + { + // Only one possible exception + // (AnyVal / 0) => DivideByZeroException + // + // Note that division by the constant 0 was already checked for above by the + // op2->IsIntegralConst(0) check + // + + if (!divisorOp->IsCnsIntOrI()) + { + // divisorOp is not a constant, so it could be zero + // + genJumpToThrowHlpBlk_la(SCK_DIV_BY_ZERO, INS_beq, divisorReg); + } + + if (size == EA_4BYTE) + { + if (tree->OperGet() == GT_UDIV) + { + ins = INS_divuw; + } + else + { + ins = INS_remuw; + } + + // TODO-RISCV64: here is just for signed-extension ? + emit->emitIns_R_R_I(INS_slliw, EA_4BYTE, Reg1, Reg1, 0); + emit->emitIns_R_R_I(INS_slliw, EA_4BYTE, divisorReg, divisorReg, 0); + } + else + { + if (tree->OperGet() == GT_UDIV) + { + ins = INS_divu; + } + else + { + ins = INS_remu; + } + } + + emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg); + } + } + } + genProduceReg(tree); +} + +// Generate code for InitBlk by performing a loop unroll +// Preconditions: +// a) Both the size and fill byte value are integer constants. +// b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes. +void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) +{ + assert(node->OperIs(GT_STORE_BLK)); + + unsigned dstLclNum = BAD_VAR_NUM; + regNumber dstAddrBaseReg = REG_NA; + int dstOffset = 0; + GenTree* dstAddr = node->Addr(); + + if (!dstAddr->isContained()) + { + dstAddrBaseReg = genConsumeReg(dstAddr); + } + else if (dstAddr->OperIsAddrMode()) + { + assert(!dstAddr->AsAddrMode()->HasIndex()); + + dstAddrBaseReg = genConsumeReg(dstAddr->AsAddrMode()->Base()); + dstOffset = dstAddr->AsAddrMode()->Offset(); + } + else + { + assert(dstAddr->OperIsLocalAddr()); + dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum(); + dstOffset = dstAddr->AsLclVarCommon()->GetLclOffs(); + } + + regNumber srcReg; + GenTree* src = node->Data(); + + if (src->OperIs(GT_INIT_VAL)) + { + assert(src->isContained()); + src = src->gtGetOp1(); + } + + if (!src->isContained()) + { + srcReg = genConsumeReg(src); + } + else + { + assert(src->IsIntegralConst(0)); + srcReg = REG_R0; + } + + if (node->IsVolatile()) + { + instGen_MemoryBarrier(); + } + + emitter* emit = GetEmitter(); + unsigned size = node->GetLayout()->GetSize(); + + assert(size <= INT32_MAX); + assert(dstOffset < INT32_MAX - static_cast(size)); + + for (unsigned regSize = 2 * REGSIZE_BYTES; size >= regSize; size -= regSize, dstOffset += regSize) + { + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(INS_sd, EA_8BYTE, srcReg, REG_NA, dstLclNum, dstOffset); + emit->emitIns_S_R(INS_sd, EA_8BYTE, srcReg, REG_NA, dstLclNum, dstOffset + 8); + } + else + { + emit->emitIns_R_R_I(INS_sd, EA_8BYTE, srcReg, dstAddrBaseReg, dstOffset); + emit->emitIns_R_R_I(INS_sd, EA_8BYTE, srcReg, dstAddrBaseReg, dstOffset + 8); + } + } + + for (unsigned regSize = REGSIZE_BYTES; size > 0; size -= regSize, dstOffset += regSize) + { + while (regSize > size) + { + regSize /= 2; + } + + instruction storeIns; + emitAttr attr; + + switch (regSize) + { + case 1: + storeIns = INS_sb; + attr = EA_4BYTE; + break; + case 2: + storeIns = INS_sh; + attr = EA_4BYTE; + break; + case 4: + storeIns = INS_sw; + attr = EA_ATTR(regSize); + break; + case 8: + storeIns = INS_sd; + attr = EA_ATTR(regSize); + break; + default: + unreached(); + } + + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(storeIns, attr, srcReg, REG_NA, dstLclNum, dstOffset); + } + else + { + emit->emitIns_R_R_I(storeIns, attr, srcReg, dstAddrBaseReg, dstOffset); + } + } +} + +void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) +{ + GenTree* dstAddr = cpObjNode->Addr(); + GenTree* source = cpObjNode->Data(); + var_types srcAddrType = TYP_BYREF; + bool sourceIsLocal = false; + + assert(source->isContained()); + if (source->gtOper == GT_IND) + { + GenTree* srcAddr = source->gtGetOp1(); + assert(!srcAddr->isContained()); + srcAddrType = srcAddr->TypeGet(); + } + else + { + noway_assert(source->IsLocal()); + sourceIsLocal = true; + } + + bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIsLocalAddr(); + +#ifdef DEBUG + assert(!dstAddr->isContained()); + + // This GenTree node has data about GC pointers, this means we're dealing + // with CpObj. + assert(cpObjNode->GetLayout()->HasGCPtr()); +#endif // DEBUG + + // Consume the operands and get them into the right registers. + // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). + genConsumeBlockOp(cpObjNode, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_SRC_BYREF, REG_NA); + gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType); + gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet()); + + ClassLayout* layout = cpObjNode->GetLayout(); + unsigned slots = layout->GetSlotCount(); + + // Temp register(s) used to perform the sequence of loads and stores. + regNumber tmpReg = cpObjNode->ExtractTempReg(); + regNumber tmpReg2 = REG_NA; + + assert(genIsValidIntReg(tmpReg)); + assert(tmpReg != REG_WRITE_BARRIER_SRC_BYREF); + assert(tmpReg != REG_WRITE_BARRIER_DST_BYREF); + + if (slots > 1) + { + tmpReg2 = cpObjNode->GetSingleTempReg(); + assert(tmpReg2 != tmpReg); + assert(genIsValidIntReg(tmpReg2)); + assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF); + assert(tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF); + } + + if (cpObjNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before a volatile CpObj operation + instGen_MemoryBarrier(); + } + + emitter* emit = GetEmitter(); + + emitAttr attrSrcAddr = emitActualTypeSize(srcAddrType); + emitAttr attrDstAddr = emitActualTypeSize(dstAddr->TypeGet()); + + // If we can prove it's on the stack we don't need to use the write barrier. + if (dstOnStack) + { + unsigned i = 0; + // Check if two or more remaining slots and use two ld/sd sequence + while (i < slots - 1) + { + emitAttr attr0 = emitTypeSize(layout->GetGCPtrType(i + 0)); + emitAttr attr1 = emitTypeSize(layout->GetGCPtrType(i + 1)); + if ((i + 2) == slots) + { + attrSrcAddr = EA_8BYTE; + attrDstAddr = EA_8BYTE; + } + + emit->emitIns_R_R_I(INS_ld, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); + emit->emitIns_R_R_I(INS_ld, attr1, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, + 2 * TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_sd, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); + emit->emitIns_R_R_I(INS_sd, attr1, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, + 2 * TARGET_POINTER_SIZE); + i += 2; + } + + // Use a ld/sd sequence for the last remainder + if (i < slots) + { + emitAttr attr0 = emitTypeSize(layout->GetGCPtrType(i + 0)); + if (i + 1 >= slots) + { + attrSrcAddr = EA_8BYTE; + attrDstAddr = EA_8BYTE; + } + + emit->emitIns_R_R_I(INS_ld, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); + emit->emitIns_R_R_I(INS_addi, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, + TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_sd, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); + emit->emitIns_R_R_I(INS_addi, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, + TARGET_POINTER_SIZE); + } + } + else + { + unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount(); + + unsigned i = 0; + while (i < slots) + { + if (!layout->IsGCPtr(i)) + { + // Check if the next slot's type is also TYP_GC_NONE and use two ld/sd + if ((i + 1 < slots) && !layout->IsGCPtr(i + 1)) + { + if ((i + 2) == slots) + { + attrSrcAddr = EA_8BYTE; + attrDstAddr = EA_8BYTE; + } + emit->emitIns_R_R_I(INS_ld, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); + emit->emitIns_R_R_I(INS_ld, EA_8BYTE, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, + REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_sd, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); + emit->emitIns_R_R_I(INS_sd, EA_8BYTE, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, + REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE); + ++i; // extra increment of i, since we are copying two items + } + else + { + if (i + 1 >= slots) + { + attrSrcAddr = EA_8BYTE; + attrDstAddr = EA_8BYTE; + } + emit->emitIns_R_R_I(INS_ld, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); + emit->emitIns_R_R_I(INS_addi, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, + REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_sd, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); + emit->emitIns_R_R_I(INS_addi, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, + REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); + } + } + else + { + // In the case of a GC-Pointer we'll call the ByRef write barrier helper + genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); + gcPtrCount--; + } + ++i; + } + assert(gcPtrCount == 0); + } + + if (cpObjNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a INS_BARRIER_RMB after a volatile CpObj operation + // TODO-LOONGARCH64: there is only BARRIER_FULL for LOONGARCH64. + instGen_MemoryBarrier(BARRIER_FULL); + } + + // Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF. + // While we normally update GC info prior to the last instruction that uses them, + // these actually live into the helper call. + gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF); +} + +// generate code do a switch statement based on a table of ip-relative offsets +void CodeGen::genTableBasedSwitch(GenTree* treeNode) +{ + genConsumeOperands(treeNode->AsOp()); + regNumber idxReg = treeNode->AsOp()->gtOp1->GetRegNum(); + regNumber baseReg = treeNode->AsOp()->gtOp2->GetRegNum(); + + regNumber tmpReg = treeNode->GetSingleTempReg(); + + // load the ip-relative offset (which is relative to start of fgFirstBB) + GetEmitter()->emitIns_R_R_I(INS_slli, EA_8BYTE, REG_RA, idxReg, 2); // TODO CHECK REG_R21 => RA + GetEmitter()->emitIns_R_R_R(INS_add, EA_8BYTE, baseReg, baseReg, REG_RA); // TODO CHECK REG_R21 => RA + GetEmitter()->emitIns_R_R_I(INS_lw, EA_4BYTE, baseReg, baseReg, 0); + + // add it to the absolute address of fgFirstBB + GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, compiler->fgFirstBB, tmpReg); + GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, baseReg, baseReg, tmpReg); + + // jr baseReg + GetEmitter()->emitIns_R_R_I(INS_jalr, emitActualTypeSize(TYP_I_IMPL), REG_R0, baseReg, 0); +} + +// emits the table and an instruction to get the address of the first element +void CodeGen::genJumpTable(GenTree* treeNode) +{ + noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH); + assert(treeNode->OperGet() == GT_JMPTABLE); + + unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount; + BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab; + unsigned jmpTabOffs; + unsigned jmpTabBase; + + jmpTabBase = GetEmitter()->emitBBTableDataGenBeg(jumpCount, true); + + jmpTabOffs = 0; + + JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", compiler->compMethodID, jmpTabBase); + + for (unsigned i = 0; i < jumpCount; i++) + { + BasicBlock* target = *jumpTable++; + noway_assert(target->bbFlags & BBF_HAS_LABEL); + + JITDUMP(" DD L_M%03u_" FMT_BB "\n", compiler->compMethodID, target->bbNum); + + GetEmitter()->emitDataGenData(i, target); + }; + + GetEmitter()->emitDataGenEnd(); + + // Access to inline data is 'abstracted' by a special type of static member + // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference + // to constant data, not a real static field. + GetEmitter()->emitIns_R_C(INS_jal, emitActualTypeSize(TYP_I_IMPL), treeNode->GetRegNum(), REG_NA, + compiler->eeFindJitDataOffs(jmpTabBase), 0); + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node. +// +// Arguments: +// treeNode - the GT_XADD/XCHG node +// +void CodeGen::genLockedInstructions(GenTreeOp* treeNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//------------------------------------------------------------------------ +// genCodeForCmpXchg: Produce code for a GT_CMPXCHG node. +// +// Arguments: +// tree - the GT_CMPXCHG node +// +void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +static inline bool isImmed(GenTree* treeNode) +{ + assert(treeNode->OperIsBinary()); + + if (treeNode->gtGetOp2()->isContainedIntOrIImmed()) + { + return true; + } + + return false; +} + +instruction CodeGen::genGetInsForOper(GenTree* treeNode) +{ + var_types type = treeNode->TypeGet(); + genTreeOps oper = treeNode->OperGet(); + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* op2; + emitAttr attr = emitActualTypeSize(treeNode); + bool isImm = false; + + instruction ins = INS_ebreak; + + if (varTypeIsFloating(type)) + { + switch (oper) + { + case GT_ADD: + if (attr == EA_4BYTE) + { + ins = INS_fadd_s; + } + else + { + ins = INS_fadd_d; + } + break; + case GT_SUB: + if (attr == EA_4BYTE) + { + ins = INS_fsub_s; + } + else + { + ins = INS_fsub_d; + } + break; + case GT_MUL: + if (attr == EA_4BYTE) + { + ins = INS_fmul_s; + } + else + { + ins = INS_fmul_d; + } + break; + case GT_DIV: + if (attr == EA_4BYTE) + { + ins = INS_fdiv_s; + } + else + { + ins = INS_fdiv_d; + } + break; + case GT_NEG: + _ASSERTE(!"TODO RISCV64 NYI"); + break; + + default: + NYI("Unhandled oper in genGetInsForOper() - float"); + unreached(); + break; + } + } + else + { + switch (oper) + { + case GT_ADD: + isImm = isImmed(treeNode); + if (isImm) + { + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_addi; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_addi; + } + } + else + { + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_add; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_addw; + } + } + break; + + case GT_SUB: + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_sub; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_subw; + } + break; + + case GT_MOD: + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_rem; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_remw; + } + break; + + case GT_DIV: + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_div; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_divw; + } + break; + + case GT_UMOD: + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_remu; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_remuw; + } + break; + + case GT_UDIV: + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_divu; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_divuw; + } + break; + + case GT_MUL: + // TODO CHECK AGAIN + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + op2 = treeNode->gtGetOp2(); + if (genActualTypeIsInt(op1) && genActualTypeIsInt(op2)) + ins = INS_mulw; + else + ins = INS_mul; + } + else + { + ins = INS_mulw; + } + break; + + case GT_NEG: + _ASSERTE(!"TODO RISCV64 NYI"); + break; + + case GT_NOT: + _ASSERTE(!"TODO RISCV64 NYI"); + break; + + case GT_AND: + isImm = isImmed(treeNode); + if (isImm) + { + ins = INS_andi; + } + else + { + ins = INS_and; + } + break; + + case GT_AND_NOT: + _ASSERTE(!"TODO RISCV64 NYI"); + break; + + case GT_OR: + isImm = isImmed(treeNode); + if (isImm) + { + ins = INS_ori; + } + else + { + ins = INS_or; + } + break; + + case GT_LSH: + isImm = isImmed(treeNode); + if (isImm) + { + // it's better to check sa. + if (attr == EA_4BYTE) + { + ins = INS_slliw; + } + else + { + ins = INS_slli; + } + } + else + { + if (attr == EA_4BYTE) + { + ins = INS_sllw; + } + else + { + ins = INS_sll; + } + } + break; + + case GT_RSZ: + isImm = isImmed(treeNode); + if (isImm) + { + // it's better to check sa. + if (attr == EA_4BYTE) + { + ins = INS_srliw; + } + else + { + ins = INS_srli; + } + } + else + { + if (attr == EA_4BYTE) + { + ins = INS_srlw; + } + else + { + ins = INS_srl; + } + } + break; + + case GT_RSH: + isImm = isImmed(treeNode); + if (isImm) + { + // it's better to check sa. + if (attr == EA_4BYTE) + { + ins = INS_sraiw; + } + else + { + ins = INS_srai; + } + } + else + { + if (attr == EA_4BYTE) + { + ins = INS_sraw; + } + else + { + ins = INS_sra; + } + } + break; + + case GT_ROR: + _ASSERTE(!"TODO RISCV64 NYI"); + break; + + case GT_XOR: + isImm = isImmed(treeNode); + if (isImm) + { + ins = INS_xori; + } + else + { + ins = INS_xor; + } + break; + + default: + NYI("Unhandled oper in genGetInsForOper() - integer"); + unreached(); + break; + } + } + return ins; +} + +//------------------------------------------------------------------------ +// genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node. +// +// Arguments: +// tree - the GT_RETURNTRAP node +// +void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) +{ + assert(tree->OperGet() == GT_RETURNTRAP); + + // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC + // based on the contents of 'data' + + GenTree* data = tree->gtOp1; + genConsumeRegs(data); + + BasicBlock* skipLabel = genCreateTempLabel(); + GetEmitter()->emitIns_J_cond_la(INS_beq, skipLabel, data->GetRegNum(), REG_R0); + + void* pAddr = nullptr; + void* addr = compiler->compGetHelperFtn(CORINFO_HELP_STOP_FOR_GC, &pAddr); + emitter::EmitCallType callType; + regNumber callTarget; + + if (addr == nullptr) + { + callType = emitter::EC_INDIR_R; + callTarget = REG_DEFAULT_HELPER_CALL_TARGET; + + if (compiler->opts.compReloc) + { + GetEmitter()->emitIns_R_AI(INS_jal, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + } + else + { + // TODO-RISCV64: maybe optimize further. + GetEmitter()->emitIns_I_la(EA_PTRSIZE, callTarget, (ssize_t)pAddr); + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, callTarget, callTarget, 0); + } + regSet.verifyRegUsed(callTarget); + } + else + { + callType = emitter::EC_FUNC_TOKEN; + callTarget = REG_NA; + } + + // TODO-RISCV64: can optimize further !!! + GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(CORINFO_HELP_STOP_FOR_GC), + INDEBUG_LDISASM_COMMA(nullptr) addr, 0, EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ + callTarget, /* ireg */ + REG_NA, 0, 0, /* xreg, xmul, disp */ + false /* isJump */ + ); + + regMaskTP killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); + regSet.verifyRegistersUsed(killMask); + + genDefineTempLabel(skipLabel); +} + +//------------------------------------------------------------------------ +// genCodeForStoreInd: Produce code for a GT_STOREIND node. +// +// Arguments: +// tree - the GT_STOREIND node +// +void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) +{ +#ifdef FEATURE_SIMD + // Storing Vector3 of size 12 bytes through indirection + if (tree->TypeGet() == TYP_SIMD12) + { + genStoreIndTypeSIMD12(tree); + return; + } +#endif // FEATURE_SIMD + + GenTree* data = tree->Data(); + GenTree* addr = tree->Addr(); + + GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree); + if (writeBarrierForm != GCInfo::WBF_NoBarrier) + { + // data and addr must be in registers. + // Consume both registers so that any copies of interfering + // registers are taken care of. + genConsumeOperands(tree); + + // At this point, we should not have any interference. + // That is, 'data' must not be in REG_WRITE_BARRIER_DST, + // as that is where 'addr' must go. + noway_assert(data->GetRegNum() != REG_WRITE_BARRIER_DST); + + // 'addr' goes into REG_T6 (REG_WRITE_BARRIER_DST) + genCopyRegIfNeeded(addr, REG_WRITE_BARRIER_DST); + + // 'data' goes into REG_T7 (REG_WRITE_BARRIER_SRC) + genCopyRegIfNeeded(data, REG_WRITE_BARRIER_SRC); + + genGCWriteBarrier(tree, writeBarrierForm); + } + else // A normal store, not a WriteBarrier store + { + // We must consume the operands in the proper execution order, + // so that liveness is updated appropriately. + genConsumeAddress(addr); + + if (!data->isContained()) + { + genConsumeRegs(data); + } + + regNumber dataReg; + if (data->isContainedIntOrIImmed()) + { + assert(data->IsIntegralConst(0)); + dataReg = REG_R0; + } + else // data is not contained, so evaluate it into a register + { + assert(!data->isContained()); + dataReg = data->GetRegNum(); + } + + var_types type = tree->TypeGet(); + instruction ins = ins_Store(type); + + if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) + { + // issue a full memory barrier before a volatile StInd + instGen_MemoryBarrier(); + } + + GetEmitter()->emitInsLoadStoreOp(ins, emitActualTypeSize(type), dataReg, tree); + } +} + +//------------------------------------------------------------------------ +// genCodeForSwap: Produce code for a GT_SWAP node. +// +// Arguments: +// tree - the GT_SWAP node +// +void CodeGen::genCodeForSwap(GenTreeOp* tree) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//------------------------------------------------------------------------ +// genIntToFloatCast: Generate code to cast an int/long to float/double +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// SrcType= int32/uint32/int64/uint64 and DstType=float/double. +// +void CodeGen::genIntToFloatCast(GenTree* treeNode) +{ + // int type --> float/double conversions are always non-overflow ones + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->GetRegNum(); + assert(genIsValidFloatReg(targetReg)); + + GenTree* op1 = treeNode->AsOp()->gtOp1; + assert(!op1->isContained()); // Cannot be contained + assert(genIsValidIntReg(op1->GetRegNum())); // Must be a valid int reg. + + var_types dstType = treeNode->CastToType(); + var_types srcType = genActualType(op1->TypeGet()); + assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); + + // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE + emitAttr srcSize = EA_ATTR(genTypeSize(srcType)); + noway_assert((srcSize == EA_4BYTE) || (srcSize == EA_8BYTE)); + + bool IsUnsigned = treeNode->gtFlags & GTF_UNSIGNED; + instruction ins = INS_invalid; + + if (IsUnsigned) + { + if (dstType == TYP_DOUBLE) + { + if (srcSize == EA_4BYTE) + { + ins = INS_fcvt_d_wu; + } + else + { + assert(srcSize == EA_8BYTE); + ins = INS_fcvt_d_lu; + } + } + else + { + assert(dstType == TYP_FLOAT); + if (srcSize == EA_4BYTE) + { + ins = INS_fcvt_s_wu; + } + else + { + assert(srcSize == EA_8BYTE); + ins = INS_fcvt_s_lu; + } + } + + } + else + { + if (dstType == TYP_DOUBLE) + { + if (srcSize == EA_4BYTE) + { + ins = INS_fcvt_d_w; + } + else + { + assert(srcSize == EA_8BYTE); + ins = INS_fcvt_d_l; + } + } + else + { + assert(dstType == TYP_FLOAT); + if (srcSize == EA_4BYTE) + { + ins = INS_fcvt_s_w; + } + else + { + assert(srcSize == EA_8BYTE); + ins = INS_fcvt_s_l; + } + } + } + + genConsumeOperands(treeNode->AsOp()); + + GetEmitter()->emitIns_R_R(ins, emitActualTypeSize(dstType), treeNode->GetRegNum(), op1->GetRegNum()); + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genFloatToIntCast: Generate code to cast float/double to int/long +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// SrcType=float/double and DstType= int32/uint32/int64/uint64 +// +void CodeGen::genFloatToIntCast(GenTree* treeNode) +{ + // int type --> float/double conversions are always non-overflow ones + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->GetRegNum(); + assert(genIsValidIntReg(targetReg)); + + GenTree* op1 = treeNode->AsOp()->gtOp1; + assert(!op1->isContained()); // Cannot be contained + assert(genIsValidFloatReg(op1->GetRegNum())); // Must be a valid int reg. + + var_types dstType = treeNode->CastToType(); + var_types srcType = genActualType(op1->TypeGet()); + assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType)); + + // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE + emitAttr dstSize = EA_ATTR(genTypeSize(dstType)); + noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE)); + + bool IsUnsigned = treeNode->gtFlags & GTF_UNSIGNED; + instruction ins = INS_invalid; + + if (IsUnsigned) + { + if (srcType == TYP_DOUBLE) + { + if (dstSize == EA_4BYTE) + { + ins = INS_fcvt_wu_d; + } + else + { + assert(dstSize == EA_8BYTE); + ins = INS_fcvt_lu_d; + } + } + else + { + assert(srcType == TYP_FLOAT); + if (dstSize == EA_4BYTE) + { + ins = INS_fcvt_wu_s; + } + else + { + assert(dstSize == EA_8BYTE); + ins = INS_fcvt_lu_s; + } + } + + } + else + { + if (srcType == TYP_DOUBLE) + { + if (dstSize == EA_4BYTE) + { + ins = INS_fcvt_w_d; + } + else + { + assert(dstSize == EA_8BYTE); + ins = INS_fcvt_l_d; + } + } + else + { + assert(srcType == TYP_FLOAT); + if (dstSize == EA_4BYTE) + { + ins = INS_fcvt_w_s; + } + else + { + assert(dstSize == EA_8BYTE); + ins = INS_fcvt_l_s; + } + } + } + + genConsumeOperands(treeNode->AsOp()); + + GetEmitter()->emitIns_R_R(ins, emitActualTypeSize(dstType), treeNode->GetRegNum(), op1->GetRegNum()); + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCkfinite: Generate code for ckfinite opcode. +// +// Arguments: +// treeNode - The GT_CKFINITE node +// +// Return Value: +// None. +// +// Assumptions: +// GT_CKFINITE node has reserved an internal register. +// +void CodeGen::genCkfinite(GenTree* treeNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//------------------------------------------------------------------------ +// genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForCompare(GenTreeOp* jtree) +{ + emitter* emit = GetEmitter(); + + GenTreeOp* tree = nullptr; + regNumber targetReg; + genTreeOps op; + if (jtree->GetRegNum() == REG_NA) + { + tree = jtree; + targetReg = REG_RA; + assert(tree->GetRegNum() == REG_NA); + } + else + { + tree = jtree; + targetReg = tree->GetRegNum(); + } + assert(targetReg != REG_NA); + + GenTree* op1 = tree->gtOp1; + GenTree* op2 = tree->gtOp2; + var_types op1Type = genActualType(op1->TypeGet()); + var_types op2Type = genActualType(op2->TypeGet()); + + assert(!op1->isUsedFromMemory()); + assert(!op2->isUsedFromMemory()); + + genConsumeOperands(tree); + + emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); + + assert(genTypeSize(op1Type) == genTypeSize(op2Type)); + + if (varTypeIsFloating(op1Type)) + { + assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); + NYI_RISCV64("genCodeForCompare not implemented on RISCV64 yet"); + } + else + { + if (op1->isContainedIntOrIImmed()) + { + op1 = tree->gtOp2; + op2 = tree->gtOp1; + switch (tree->OperGet()) + { + case GT_LT: + tree->SetOper(GT_GT); + break; + case GT_LE: + tree->SetOper(GT_GE); + break; + case GT_GT: + tree->SetOper(GT_LT); + break; + case GT_GE: + tree->SetOper(GT_LE); + break; + default: + break; + } + } + assert(!op1->isContainedIntOrIImmed()); + assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); + + bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0; + regNumber regOp1 = op1->GetRegNum(); + + if (op2->isContainedIntOrIImmed()) + { + ssize_t imm = op2->AsIntCon()->gtIconVal; + + switch (cmpSize) + { + case EA_4BYTE: + if (IsUnsigned) + { + imm = static_cast(imm); + + regNumber tmpRegOp1 = rsGetRsvdReg(); + assert(regOp1 != tmpRegOp1); + + emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32); + emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32); + regOp1 = tmpRegOp1; + } + else + { + imm = static_cast(imm); + } + break; + case EA_8BYTE: + break; + case EA_1BYTE: + if (IsUnsigned) + { + imm = static_cast(imm); + } + else + { + imm = static_cast(imm); + } + break; + // case EA_2BYTE: + // if (IsUnsigned) + // { + // imm = static_cast(imm); + // } + // else + // { + // imm = static_cast(imm); + // } + // break; + default: + unreached(); + } + + if (tree->OperIs(GT_LT)) + { + if (!IsUnsigned && emitter::isValidSimm12(imm)) + { + emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm); + } + else if (IsUnsigned && emitter::isValidUimm11(imm)) + { + emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, imm); + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA); + } + } + else if (tree->OperIs(GT_LE)) + { + if (!IsUnsigned && emitter::isValidSimm12(imm + 1)) + { + emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1); + } + else if (IsUnsigned && emitter::isValidUimm11(imm + 1)) + { + emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, imm + 1); + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm + 1); + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA); + } + } + else if (tree->OperIs(GT_GT)) + { + if (!IsUnsigned && emitter::isValidSimm12(imm + 1)) + { + emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1); + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); + } + else if (IsUnsigned && emitter::isValidUimm11(imm + 1)) + { + emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, imm + 1); + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, REG_RA, regOp1); + } + } + else if (tree->OperIs(GT_GE)) + { + if (!IsUnsigned && emitter::isValidSimm12(imm)) + { + emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm); + } + else if (IsUnsigned && emitter::isValidUimm11(imm)) + { + emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, imm); + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA); + } + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); + } + else if (tree->OperIs(GT_NE)) + { + if (!imm) + { + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, regOp1); + } + else if (emitter::isValidUimm12(imm)) + { + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm); + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg); + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA); + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg); + } + } + else if (tree->OperIs(GT_EQ)) + { + if (!imm) + { + emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, 1); + } + else if (emitter::isValidUimm12(imm)) + { + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm); + emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, targetReg, 1); + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA); + emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, targetReg, 1); + } + } + } + else + { + regNumber regOp2 = op2->GetRegNum(); + + if (cmpSize == EA_4BYTE) + { + regNumber tmpRegOp1 = REG_RA; + regNumber tmpRegOp2 = rsGetRsvdReg(); + assert(regOp1 != tmpRegOp2); + assert(regOp2 != tmpRegOp2); + + if (IsUnsigned) + { + emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32); + emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32); + + emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp2, regOp2, 32); + emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp2, tmpRegOp2, 32); + } + else + { + emit->emitIns_R_R_I(INS_slliw, EA_8BYTE, tmpRegOp1, regOp1, 0); + emit->emitIns_R_R_I(INS_slliw, EA_8BYTE, tmpRegOp2, regOp2, 0); + } + + regOp1 = tmpRegOp1; + regOp2 = tmpRegOp2; + } + + if (tree->OperIs(GT_LT)) + { + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp1, regOp2); + } + else if (tree->OperIs(GT_LE)) + { + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp2, regOp1); + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); + } + else if (tree->OperIs(GT_GT)) + { + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp2, regOp1); + } + else if (tree->OperIs(GT_GE)) + { + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp1, regOp2); + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); + } + else if (tree->OperIs(GT_NE)) + { + emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2); + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg); + } + else if (tree->OperIs(GT_EQ)) + { + emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2); + emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, targetReg, 1); + } + } + } +} + +//------------------------------------------------------------------------ +// genCodeForJcc: Generate code for a GT_JCC node. +// +// Arguments: +// jcc - The node +// +void CodeGen::genCodeForJcc(GenTreeCC* jcc) +{ + // TODO CHECK REGISTER ALLOCATION RSVD AND REG_RA + emitter* emit = GetEmitter(); + + instruction ins = INS_invalid; + + assert(jcc->OperIs(GT_JCC) && (instruction)jcc->GetRegNum() == INS_bnez); + emit->emitIns_J(INS_bnez, compiler->compCurBB->bbJumpDest, (int)(int64_t)REG_RA); + jcc->SetRegNum(REG_NA); +} + +//------------------------------------------------------------------------ +// genCodeForJumpCompare: Generates code for jmpCompare statement. +// +// A GT_JCMP node is created when a comparison and conditional branch +// can be executed in a single instruction. +// +// Arguments: +// tree - The GT_JCMP tree node. +// +// Return Value: +// None +// +void CodeGen::genCodeForJumpCompare(GenTreeOp* tree) +{ + assert(compiler->compCurBB->bbJumpKind == BBJ_COND); + + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + + assert(tree->OperIs(GT_JCMP)); + assert(!varTypeIsFloating(tree)); + assert(!op1->isUsedFromMemory()); + assert(!op2->isUsedFromMemory()); + assert(op2->IsCnsIntOrI()); + assert(op2->isContained()); + + genConsumeOperands(tree); + + regNumber reg = op1->GetRegNum(); + emitAttr attr = emitActualTypeSize(op1->TypeGet()); + + instruction ins; + int regs; + ssize_t imm = op2->AsIntCon()->gtIconVal; + assert(reg != REG_T6); // TODO R21 => T6 + assert(reg != REG_RA); + + if (attr == EA_4BYTE) + { + imm = (int32_t)imm; + GetEmitter()->emitIns_R_R_I(INS_slliw, EA_4BYTE, REG_RA, reg, 0); + reg = REG_RA; + } + + if (imm != 0) + { + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_T6, imm); + regs = (int)reg << 5; + regs |= (int)REG_T6; + ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beq : INS_bne; + } + else + { + regs = (int)reg; + ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beqz : INS_bnez; + } + + GetEmitter()->emitIns_J(ins, compiler->compCurBB->bbJumpDest, regs); // 5-bits; +} + +//--------------------------------------------------------------------- +// genSPtoFPdelta - return offset from the stack pointer (Initial-SP) to the frame pointer. The frame pointer +// will point to the saved frame pointer slot (i.e., there will be frame pointer chaining). +// +int CodeGenInterface::genSPtoFPdelta() const +{ + assert(isFramePointerUsed()); + + int delta = compiler->lvaOutgoingArgSpaceSize; + + assert(delta >= 0); + return delta; +} + +//--------------------------------------------------------------------- +// genTotalFrameSize - return the total size of the stack frame, including local size, +// callee-saved register size, etc. +// +// Return value: +// Total frame size +// + +int CodeGenInterface::genTotalFrameSize() const +{ + // For varargs functions, we home all the incoming register arguments. They are not + // included in the compCalleeRegsPushed count. This is like prespill on ARM32, but + // since we don't use "push" instructions to save them, we don't have to do the + // save of these varargs register arguments as the first thing in the prolog. + + assert(!IsUninitialized(compiler->compCalleeRegsPushed)); + + int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize; + + assert(totalFrameSize > 0); + return totalFrameSize; +} + +//--------------------------------------------------------------------- +// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer. +// This number is going to be negative, since the Caller-SP is at a higher +// address than the frame pointer. +// +// There must be a frame pointer to call this function! + +int CodeGenInterface::genCallerSPtoFPdelta() const +{ + assert(isFramePointerUsed()); + int callerSPtoFPdelta; + + callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta(); + + assert(callerSPtoFPdelta <= 0); + return callerSPtoFPdelta; +} + +//--------------------------------------------------------------------- +// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP. +// +// This number will be negative. + +int CodeGenInterface::genCallerSPtoInitialSPdelta() const +{ + int callerSPtoSPdelta = 0; + + callerSPtoSPdelta -= genTotalFrameSize(); + + assert(callerSPtoSPdelta <= 0); + return callerSPtoSPdelta; +} + +/***************************************************************************** + * Emit a call to a helper function. + */ + +void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */) +{ + void* addr = nullptr; + void* pAddr = nullptr; + + emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; + addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); + regNumber callTarget = REG_NA; + + if (addr == nullptr) + { + // This is call to a runtime helper. + // lui reg, pAddr #NOTE: this maybe multi-instructions. + // ld reg, reg + // jalr reg + + if (callTargetReg == REG_NA) + { + // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but + // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET. + callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET; + } + + regMaskTP callTargetMask = genRegMask(callTargetReg); + regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); + + // assert that all registers in callTargetMask are in the callKillSet + noway_assert((callTargetMask & callKillSet) == callTargetMask); + + callTarget = callTargetReg; + + if (compiler->opts.compReloc) + { + // TODO-RISCV64: here the jal is special flag rather than a real instruction. + GetEmitter()->emitIns_R_AI(INS_jal, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + } + else + { + ssize_t high = (((ssize_t)pAddr) >> 32) & 0xffffffff; + GetEmitter()->emitIns_R_I(INS_lui, EA_PTRSIZE, callTarget, (((high + 0x800) >> 12) & 0xfffff)); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, callTarget, callTarget, (high & 0xfff)); + + ssize_t low = ((ssize_t)pAddr) & 0xffffffff; + GetEmitter()->emitIns_R_R_I(INS_slli, EA_PTRSIZE, callTarget, callTarget, 11); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, callTarget, callTarget, ((low >> 21) & 0x7ff)); + + GetEmitter()->emitIns_R_R_I(INS_slli, EA_PTRSIZE, callTarget, callTarget, 11); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, callTarget, callTarget, ((low >> 10) & 0x7ff)); + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, callTarget, callTarget, (low & 0x3ff)); + } + regSet.verifyRegUsed(callTarget); + + callType = emitter::EC_INDIR_R; + } + + GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, + retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ + callTarget, /* ireg */ + REG_NA, 0, 0, /* xreg, xmul, disp */ + false /* isJump */ + ); + + regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); + regSet.verifyRegistersUsed(killMask); +} + +#ifdef FEATURE_SIMD + +//------------------------------------------------------------------------ +// genSIMDIntrinsic: Generate code for a SIMD Intrinsic. This is the main +// routine which in turn calls appropriate genSIMDIntrinsicXXX() routine. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +// Notes: +// Currently, we only recognize SIMDVector and SIMDVector, and +// a limited set of methods. +// +// TODO-CLEANUP Merge all versions of this function and move to new file simdcodegencommon.cpp. +void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType) +{ + NYI("unimplemented on RISCV64 yet"); + return INS_OPTS_NONE; +} + +// getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic +// +// Arguments: +// intrinsicId - SIMD intrinsic Id +// baseType - Base type of the SIMD vector +// immed - Out param. Any immediate byte operand that needs to be passed to SSE2 opcode +// +// +// Return Value: +// Instruction (op) to be used, and immed is set if instruction requires an immediate operand. +// +instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/) +{ + NYI("unimplemented on RISCV64 yet"); + return INS_invalid; +} + +//------------------------------------------------------------------------ +// genSIMDIntrinsicInit: Generate code for SIMD Intrinsic Initialize. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//------------------------------------------------------------------------------------------- +// genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes +// a number of arguments equal to the length of the Vector. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//---------------------------------------------------------------------------------- +// genSIMDIntrinsicUnOp: Generate code for SIMD Intrinsic unary operations like sqrt. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//-------------------------------------------------------------------------------- +// genSIMDIntrinsicWiden: Generate code for SIMD Intrinsic Widen operations +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Notes: +// The Widen intrinsics are broken into separate intrinsics for the two results. +// +void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//-------------------------------------------------------------------------------- +// genSIMDIntrinsicNarrow: Generate code for SIMD Intrinsic Narrow operations +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Notes: +// This intrinsic takes two arguments. The first operand is narrowed to produce the +// lower elements of the results, and the second operand produces the high elements. +// +void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//-------------------------------------------------------------------------------- +// genSIMDIntrinsicBinOp: Generate code for SIMD Intrinsic binary operations +// add, sub, mul, bit-wise And, AndNot and Or. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//-------------------------------------------------------------------------------- +// genSIMDIntrinsicRelOp: Generate code for a SIMD Intrinsic relational operator +// == and != +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//-------------------------------------------------------------------------------- +// genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//------------------------------------------------------------------------------------ +// genSIMDIntrinsicGetItem: Generate code for SIMD Intrinsic get element at index i. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//------------------------------------------------------------------------------------ +// genSIMDIntrinsicSetItem: Generate code for SIMD Intrinsic set element at index i. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//----------------------------------------------------------------------------- +// genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD16 vector to +// the given register, if any, or to memory. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +// Notes: +// The upper half of all SIMD registers are volatile, even the callee-save registers. +// When a 16-byte SIMD value is live across a call, the register allocator will use this intrinsic +// to cause the upper half to be saved. It will first attempt to find another, unused, callee-save +// register. If such a register cannot be found, it will save it to an available caller-save register. +// In that case, this node will be marked GTF_SPILL, which will cause this method to save +// the upper half to the lclVar's home location. +// +void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//----------------------------------------------------------------------------- +// genSIMDIntrinsicUpperRestore: Restore the upper half of a TYP_SIMD16 vector to +// the given register, if any, or to memory. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +// Notes: +// For consistency with genSIMDIntrinsicUpperSave, and to ensure that lclVar nodes always +// have their home register, this node has its targetReg on the lclVar child, and its source +// on the simdNode. +// Regarding spill, please see the note above on genSIMDIntrinsicUpperSave. If we have spilled +// an upper-half to the lclVar's home location, this node will be marked GTF_SPILLED. +// +void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//----------------------------------------------------------------------------- +// genStoreIndTypeSIMD12: store indirect a TYP_SIMD12 (i.e. Vector3) to memory. +// Since Vector3 is not a hardware supported write size, it is performed +// as two writes: 8 byte followed by 4-byte. +// +// Arguments: +// treeNode - tree node that is attempting to store indirect +// +// +// Return Value: +// None. +// +void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//----------------------------------------------------------------------------- +// genLoadIndTypeSIMD12: load indirect a TYP_SIMD12 (i.e. Vector3) value. +// Since Vector3 is not a hardware supported write size, it is performed +// as two loads: 8 byte followed by 4-byte. +// +// Arguments: +// treeNode - tree node of GT_IND +// +// +// Return Value: +// None. +// +void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//----------------------------------------------------------------------------- +// genStoreLclTypeSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field. +// Since Vector3 is not a hardware supported write size, it is performed +// as two stores: 8 byte followed by 4-byte. +// +// Arguments: +// treeNode - tree node that is attempting to store TYP_SIMD12 field +// +// Return Value: +// None. +// +void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +#endif // FEATURE_SIMD + +void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//------------------------------------------------------------------------ +// genStackPointerConstantAdjustmentWithProbe: add a specified constant value to the stack pointer, +// and probe the stack as appropriate. Should only be called as a helper for +// genStackPointerConstantAdjustmentLoopWithProbe. +// +// Arguments: +// spDelta - the value to add to SP. Must be negative or zero. If zero, the probe happens, +// but the stack pointer doesn't move. +// regTmp - temporary register to use as target for probe load instruction +// +// Return Value: +// None. +// +void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//------------------------------------------------------------------------ +// genStackPointerConstantAdjustmentLoopWithProbe: Add a specified constant value to the stack pointer, +// and probe the stack as appropriate. Generates one probe per page, up to the total amount required. +// This will generate a sequence of probes in-line. +// +// Arguments: +// spDelta - the value to add to SP. Must be negative. +// regTmp - temporary register to use as target for probe load instruction +// +// Return Value: +// Offset in bytes from SP to last probed address. +// +target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp) +{ + NYI("unimplemented on RISCV64 yet"); + return 0; +} + +//------------------------------------------------------------------------ +// genCodeForTreeNode Generate code for a single node in the tree. +// +// Preconditions: +// All operands have been evaluated. +// +void CodeGen::genCodeForTreeNode(GenTree* treeNode) +{ + regNumber targetReg = treeNode->GetRegNum(); + var_types targetType = treeNode->TypeGet(); + emitter* emit = GetEmitter(); + +#ifdef DEBUG + // Validate that all the operands for the current node are consumed in order. + // This is important because LSRA ensures that any necessary copies will be + // handled correctly. + lastConsumedNode = nullptr; + if (compiler->verbose) + { + unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio + compiler->gtDispLIRNode(treeNode, "Generating: "); + } +#endif // DEBUG + + // Is this a node whose value is already in a register? LSRA denotes this by + // setting the GTF_REUSE_REG_VAL flag. + if (treeNode->IsReuseRegVal()) + { + // For now, this is only used for constant nodes. + assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL)); + JITDUMP(" TreeNode is marked ReuseReg\n"); + return; + } + + // contained nodes are part of their parents for codegen purposes + // ex : immediates, most LEAs + if (treeNode->isContained()) + { + return; + } + + switch (treeNode->gtOper) + { + case GT_START_NONGC: + GetEmitter()->emitDisableGC(); + break; + + case GT_START_PREEMPTGC: + // Kill callee saves GC registers, and create a label + // so that information gets propagated to the emitter. + gcInfo.gcMarkRegSetNpt(RBM_INT_CALLEE_SAVED); + genDefineTempLabel(genCreateTempLabel()); + break; + + case GT_PROF_HOOK: + // We should be seeing this only if profiler hook is needed + noway_assert(compiler->compIsProfilerHookNeeded()); + +#ifdef PROFILING_SUPPORTED + // Right now this node is used only for tail calls. In future if + // we intend to use it for Enter or Leave hooks, add a data member + // to this node indicating the kind of profiler hook. For example, + // helper number can be used. + genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); +#endif // PROFILING_SUPPORTED + break; + + case GT_LCLHEAP: + genLclHeap(treeNode); + break; + + case GT_CNS_INT: + if ((targetType == TYP_DOUBLE) || (targetType == TYP_FLOAT)) + { + treeNode->gtOper = GT_CNS_DBL; + } + FALLTHROUGH; + case GT_CNS_DBL: + genSetRegToConst(targetReg, targetType, treeNode); + genProduceReg(treeNode); + break; + + case GT_NOT: + case GT_NEG: + genCodeForNegNot(treeNode); + break; + + case GT_BSWAP: + case GT_BSWAP16: + genCodeForBswap(treeNode); + break; + + case GT_MOD: + case GT_UMOD: + case GT_DIV: + case GT_UDIV: + genCodeForDivMod(treeNode->AsOp()); + break; + + case GT_OR: + case GT_XOR: + case GT_AND: + case GT_AND_NOT: + assert(varTypeIsIntegralOrI(treeNode)); + + FALLTHROUGH; + + case GT_ADD: + case GT_SUB: + case GT_MUL: + genConsumeOperands(treeNode->AsOp()); + genCodeForBinary(treeNode->AsOp()); + break; + + case GT_LSH: + case GT_RSH: + case GT_RSZ: + case GT_ROR: + case GT_ROL: + genCodeForShift(treeNode); + break; + + case GT_CAST: + genCodeForCast(treeNode->AsOp()); + break; + + case GT_BITCAST: + genCodeForBitCast(treeNode->AsOp()); + break; + + case GT_LCL_FLD_ADDR: + case GT_LCL_VAR_ADDR: + genCodeForLclAddr(treeNode->AsLclVarCommon()); + break; + + case GT_LCL_FLD: + genCodeForLclFld(treeNode->AsLclFld()); + break; + + case GT_LCL_VAR: + genCodeForLclVar(treeNode->AsLclVar()); + break; + + case GT_STORE_LCL_FLD: + genCodeForStoreLclFld(treeNode->AsLclFld()); + break; + + case GT_STORE_LCL_VAR: + genCodeForStoreLclVar(treeNode->AsLclVar()); + break; + + case GT_RETFILT: + case GT_RETURN: + genReturn(treeNode); + break; + + case GT_LEA: + // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction. + genLeaInstruction(treeNode->AsAddrMode()); + break; + + case GT_INDEX_ADDR: + genCodeForIndexAddr(treeNode->AsIndexAddr()); + break; + + case GT_IND: + genCodeForIndir(treeNode->AsIndir()); + break; + + case GT_INC_SATURATE: + genCodeForIncSaturate(treeNode); + break; + + case GT_MULHI: + genCodeForMulHi(treeNode->AsOp()); + break; + + case GT_SWAP: + genCodeForSwap(treeNode->AsOp()); + break; + + case GT_JMP: + genJmpMethod(treeNode); + break; + + case GT_CKFINITE: + genCkfinite(treeNode); + break; + + case GT_INTRINSIC: + genIntrinsic(treeNode->AsIntrinsic()); + break; + +#ifdef FEATURE_SIMD + case GT_SIMD: + genSIMDIntrinsic(treeNode->AsSIMD()); + break; +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + genHWIntrinsic(treeNode->AsHWIntrinsic()); + break; +#endif // FEATURE_HW_INTRINSICS + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + case GT_CMP: + if (treeNode->GetRegNum() != REG_NA) + { + genCodeForCompare(treeNode->AsOp()); + } + else + { + GenTree* treeNode_next = treeNode->gtNext; + while (treeNode_next) + { + if (treeNode_next->OperIs(GT_JCC)) + { + break; + } + treeNode_next = treeNode_next->gtNext; + }; + assert(treeNode_next->OperIs(GT_JCC)); + treeNode_next->SetRegNum((regNumber)INS_bnez); + // Revert JCC Comparison. TODO NEED TO UPDATE LATER. + GenTreeCC* jcc = treeNode_next->AsCC(); + unsigned code = jcc->gtCondition.GetCode(); + static constexpr genTreeOps s_gtopers[] = {GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT}; + assert((code & 0x7) <= 0x5); + treeNode->SetOper(s_gtopers[code & 0x7]); + genCodeForCompare(treeNode->AsOp()); + } + break; + + case GT_JCC: + genCodeForJcc(treeNode->AsCC()); + break; + + case GT_JCMP: + genCodeForJumpCompare(treeNode->AsOp()); + break; + + case GT_RETURNTRAP: + genCodeForReturnTrap(treeNode->AsOp()); + break; + + case GT_STOREIND: + genCodeForStoreInd(treeNode->AsStoreInd()); + break; + + case GT_COPY: + // This is handled at the time we call genConsumeReg() on the GT_COPY + break; + + case GT_FIELD_LIST: + // Should always be marked contained. + assert(!"LIST, FIELD_LIST nodes should always be marked contained."); + break; + + case GT_PUTARG_STK: + genPutArgStk(treeNode->AsPutArgStk()); + break; + + case GT_PUTARG_REG: + genPutArgReg(treeNode->AsOp()); + break; + + case GT_PUTARG_SPLIT: + genPutArgSplit(treeNode->AsPutArgSplit()); + break; + + case GT_CALL: + genCall(treeNode->AsCall()); + break; + + case GT_MEMORYBARRIER: + { + CodeGen::BarrierKind barrierKind = + treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD ? BARRIER_LOAD_ONLY : BARRIER_FULL; + + instGen_MemoryBarrier(barrierKind); + break; + } + + case GT_XCHG: + case GT_XADD: + genLockedInstructions(treeNode->AsOp()); + break; + + case GT_CMPXCHG: + genCodeForCmpXchg(treeNode->AsCmpXchg()); + break; + + case GT_RELOAD: + // do nothing - reload is just a marker. + // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child + // into the register specified in this node. + break; + + case GT_NOP: + break; + + case GT_KEEPALIVE: + if (treeNode->AsOp()->gtOp1->isContained()) + { + // For this case we simply need to update the lifetime of the local. + genUpdateLife(treeNode->AsOp()->gtOp1); + } + else + { + genConsumeReg(treeNode->AsOp()->gtOp1); + } + break; + + case GT_NO_OP: + instGen(INS_nop); + break; + + case GT_BOUNDS_CHECK: + genRangeCheck(treeNode); + break; + + case GT_PHYSREG: + genCodeForPhysReg(treeNode->AsPhysReg()); + break; + + case GT_NULLCHECK: + genCodeForNullCheck(treeNode->AsIndir()); + break; + + case GT_CATCH_ARG: + + noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp)); + + /* Catch arguments get passed in a register. genCodeForBBlist() + would have marked it as holding a GC object, but not used. */ + + noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT); + genConsumeReg(treeNode); + break; + + case GT_PINVOKE_PROLOG: + noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0); + +// the runtime side requires the codegen here to be consistent +#ifdef PSEUDORANDOM_NOP_INSERTION + emit->emitDisableRandomNops(); +#endif // PSEUDORANDOM_NOP_INSERTION + break; + + case GT_LABEL: + genPendingCallLabel = genCreateTempLabel(); + emit->emitIns_R_L(INS_ld, EA_PTRSIZE, genPendingCallLabel, targetReg); + break; + + case GT_STORE_OBJ: + case GT_STORE_DYN_BLK: + case GT_STORE_BLK: + genCodeForStoreBlk(treeNode->AsBlk()); + break; + + case GT_JMPTABLE: + genJumpTable(treeNode); + break; + + case GT_SWITCH_TABLE: + genTableBasedSwitch(treeNode); + break; + + case GT_ARR_INDEX: + genCodeForArrIndex(treeNode->AsArrIndex()); + break; + + case GT_ARR_OFFSET: + genCodeForArrOffset(treeNode->AsArrOffs()); + break; + + case GT_IL_OFFSET: + // Do nothing; these nodes are simply markers for debug info. + break; + + default: + { +#ifdef DEBUG + char message[256]; + _snprintf_s(message, ArrLen(message), _TRUNCATE, "NYI: Unimplemented node type %s", + GenTree::OpName(treeNode->OperGet())); + NYIRAW(message); +#else + NYI("unimplemented node"); +#endif + } + break; + } +} + +//--------------------------------------------------------------------- +// genSetGSSecurityCookie: Set the "GS" security cookie in the prolog. +// +// Arguments: +// initReg - register to use as a scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if +// this call sets 'initReg' to a non-zero value. +// +// Return Value: +// None +// +void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + if (!compiler->getNeedsGSSecurityCookie()) + { + return; + } + + if (compiler->gsGlobalSecurityCookieAddr == nullptr) + { + noway_assert(compiler->gsGlobalSecurityCookieVal != 0); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, compiler->gsGlobalSecurityCookieVal); + + GetEmitter()->emitIns_S_R(INS_sd, EA_PTRSIZE, initReg, REG_NA, compiler->lvaGSSecurityCookie, 0); + } + else + { + if (compiler->opts.compReloc) + { + GetEmitter()->emitIns_R_AI(INS_jalr, EA_PTR_DSP_RELOC, initReg, + (ssize_t)compiler->gsGlobalSecurityCookieAddr); + } + else + { + GetEmitter()->emitIns_I_la(EA_PTRSIZE, initReg, ((size_t)compiler->gsGlobalSecurityCookieAddr)); + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, initReg, initReg, 0); + } + regSet.verifyRegUsed(initReg); + GetEmitter()->emitIns_S_R(INS_sd, EA_PTRSIZE, initReg, REG_NA, compiler->lvaGSSecurityCookie, 0); + } + + *pInitRegZeroed = false; +} + +//------------------------------------------------------------------------ +// genEmitGSCookieCheck: Generate code to check that the GS cookie +// wasn't thrashed by a buffer overrun. +// +void CodeGen::genEmitGSCookieCheck(bool pushReg) +{ + noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal); + + // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while + // executing GS cookie check will not collect the object pointed to by REG_INTRET (A0). + if (!pushReg && (compiler->info.compRetNativeType == TYP_REF)) + { + gcInfo.gcRegGCrefSetCur |= RBM_INTRET; + } + + // We need two temporary registers, to load the GS cookie values and compare them. We can't use + // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be + // callee-trash registers, which should not contain anything interesting at this point. + // We don't have any IR node representing this check, so LSRA can't communicate registers + // for us to use. + + regNumber regGSConst = REG_GSCOOKIE_TMP_0; + regNumber regGSValue = REG_GSCOOKIE_TMP_1; + + if (compiler->gsGlobalSecurityCookieAddr == nullptr) + { + // load the GS cookie constant into a reg + // + instGen_Set_Reg_To_Imm(EA_PTRSIZE, regGSConst, compiler->gsGlobalSecurityCookieVal); + } + else + { + //// Ngen case - GS cookie constant needs to be accessed through an indirection. + // instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, regGSConst, regGSConst, 0); + if (compiler->opts.compReloc) + { + GetEmitter()->emitIns_R_AI(INS_jal, EA_PTR_DSP_RELOC, regGSConst, + (ssize_t)compiler->gsGlobalSecurityCookieAddr); + } + else + { + // TODO-RISCV64: maybe optimize furtherk! + UINT32 high = ((ssize_t)compiler->gsGlobalSecurityCookieAddr) >> 32; + if (((high + 0x800) >> 12) != 0) + { + GetEmitter()->emitIns_R_I(INS_lui, EA_PTRSIZE, regGSConst, + (((high + 0x800) >> 12) & 0xfffff)); + } + if ((high & 0xFFF) != 0) + { + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, regGSConst, + REG_R0, (high & 0xfff)); + } + UINT32 low = ((ssize_t)compiler->gsGlobalSecurityCookieAddr) & 0xffffffff; + GetEmitter()->emitIns_R_R_I(INS_slli, EA_PTRSIZE, regGSConst, + regGSConst, 11); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, regGSConst, + regGSConst, (low >> 21) & 0x7FF); + GetEmitter()->emitIns_R_R_I(INS_slli, EA_PTRSIZE, regGSConst, + regGSConst, 11); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, regGSConst, + regGSConst, (low >> 10) & 0x7FF); + GetEmitter()->emitIns_R_R_I(INS_slli, EA_PTRSIZE, regGSConst, + regGSConst, 10); + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, regGSConst, + regGSConst, low & 0x3FF); + } + regSet.verifyRegUsed(regGSConst); + } + // Load this method's GS value from the stack frame + GetEmitter()->emitIns_R_S(INS_ld, EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0); + + // Compare with the GC cookie constant + BasicBlock* gsCheckBlk = genCreateTempLabel(); + GetEmitter()->emitIns_J_cond_la(INS_beq, gsCheckBlk, regGSConst, regGSValue); + + // regGSConst and regGSValue aren't needed anymore, we can use them for helper call + genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst); + genDefineTempLabel(gsCheckBlk); +} + +//--------------------------------------------------------------------- +// genIntrinsic - generate code for a given intrinsic +// +// Arguments +// treeNode - the GT_INTRINSIC node +// +// Return value: +// None +// +void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//--------------------------------------------------------------------- +// genPutArgStk - generate code for a GT_PUTARG_STK node +// +// Arguments +// treeNode - the GT_PUTARG_STK node +// +// Return value: +// None +// +void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) +{ + assert(treeNode->OperIs(GT_PUTARG_STK)); + GenTree* source = treeNode->gtOp1; + var_types targetType = genActualType(source->TypeGet()); + emitter* emit = GetEmitter(); + + // This is the varNum for our store operations, + // typically this is the varNum for the Outgoing arg space + // When we are generating a tail call it will be the varNum for arg0 + unsigned varNumOut = (unsigned)-1; + unsigned argOffsetMax = (unsigned)-1; // Records the maximum size of this area for assert checks + + // Get argument offset to use with 'varNumOut' + // Here we cross check that argument offset hasn't changed from lowering to codegen since + // we are storing arg slot number in GT_PUTARG_STK node in lowering phase. + unsigned argOffsetOut = treeNode->getArgOffset(); + + // Whether to setup stk arg in incoming or out-going arg area? + // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area. + // All other calls - stk arg is setup in out-going arg area. + if (treeNode->putInIncomingArgArea()) + { + varNumOut = getFirstArgWithStackSlot(); + argOffsetMax = compiler->compArgSize; +#if FEATURE_FASTTAILCALL + // This must be a fast tail call. + assert(treeNode->gtCall->IsFastTailCall()); + + // Since it is a fast tail call, the existence of first incoming arg is guaranteed + // because fast tail call requires that in-coming arg area of caller is >= out-going + // arg area required for tail call. + LclVarDsc* varDsc = &(compiler->lvaTable[varNumOut]); + assert(varDsc != nullptr); +#endif // FEATURE_FASTTAILCALL + } + else + { + varNumOut = compiler->lvaOutgoingArgSpaceVar; + argOffsetMax = compiler->lvaOutgoingArgSpaceSize; + } + + bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST); + + if (!isStruct) // a normal non-Struct argument + { + if (varTypeIsSIMD(targetType)) + { + NYI("unimplemented on RISCV64 yet"); + } + + instruction storeIns = ins_Store(targetType); + emitAttr storeAttr = emitTypeSize(targetType); + + // If it is contained then source must be the integer constant zero + if (source->isContained()) + { + assert(source->OperGet() == GT_CNS_INT); + assert(source->AsIntConCommon()->IconValue() == 0); + + emit->emitIns_S_R(storeIns, storeAttr, REG_R0, REG_NA, varNumOut, argOffsetOut); + } + else + { + genConsumeReg(source); + if (storeIns == INS_sw) + { + emit->emitIns_R_R_R(INS_addw, EA_4BYTE, source->GetRegNum(), source->GetRegNum(), REG_R0); + storeIns = INS_sd; + storeAttr = EA_8BYTE; + } + emit->emitIns_S_R(storeIns, storeAttr, source->GetRegNum(), REG_NA, varNumOut, argOffsetOut); + } + argOffsetOut += EA_SIZE_IN_BYTES(storeAttr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyond the outgoing area + } + else // We have some kind of a struct argument + { + assert(source->isContained()); // We expect that this node was marked as contained in Lower + + if (source->OperGet() == GT_FIELD_LIST) + { + genPutArgStkFieldList(treeNode, varNumOut); + } + else // We must have a GT_OBJ or a GT_LCL_VAR + { + noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ)); + + var_types targetType = source->TypeGet(); + noway_assert(varTypeIsStruct(targetType)); + + // Setup loReg from the internal registers that we reserved in lower. + // + regNumber loReg = treeNode->ExtractTempReg(); + regNumber addrReg = REG_NA; + + GenTreeLclVarCommon* varNode = nullptr; + GenTree* addrNode = nullptr; + + if (source->OperGet() == GT_LCL_VAR) + { + varNode = source->AsLclVarCommon(); + } + else // we must have a GT_OBJ + { + assert(source->OperGet() == GT_OBJ); + + addrNode = source->AsOp()->gtOp1; + + // addrNode can either be a GT_LCL_VAR_ADDR or an address expression + // + if (addrNode->OperGet() == GT_LCL_VAR_ADDR) + { + // We have a GT_OBJ(GT_LCL_VAR_ADDR) + // + // We will treat this case the same as above + // (i.e if we just had this GT_LCL_VAR directly as the source) + // so update 'source' to point this GT_LCL_VAR_ADDR node + // and continue to the codegen for the LCL_VAR node below + // + varNode = addrNode->AsLclVarCommon(); + addrNode = nullptr; + } + else // addrNode is used + { + // Generate code to load the address that we need into a register + genConsumeAddress(addrNode); + addrReg = addrNode->GetRegNum(); + } + } + + // Either varNode or addrNOde must have been setup above, + // the xor ensures that only one of the two is setup, not both + assert((varNode != nullptr) ^ (addrNode != nullptr)); + + ClassLayout* layout; + + // unsigned gcPtrCount; // The count of GC pointers in the struct + unsigned srcSize; + + // gcPtrCount = treeNode->gtNumSlots; + // Setup the srcSize and layout + if (source->OperGet() == GT_LCL_VAR) + { + assert(varNode != nullptr); + LclVarDsc* varDsc = compiler->lvaGetDesc(varNode); + + // This struct also must live in the stack frame + // And it can't live in a register (SIMD) + assert(varDsc->lvType == TYP_STRUCT); + assert(varDsc->lvOnFrame && !varDsc->lvRegister); + + srcSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine + // as that is how much stack is allocated for this LclVar + layout = varDsc->GetLayout(); + } + else // we must have a GT_OBJ + { + assert(source->OperGet() == GT_OBJ); + + // If the source is an OBJ node then we need to use the type information + // it provides (size and GC layout) even if the node wraps a lclvar. Due + // to struct reinterpretation (e.g. Unsafe.As) it is possible that + // the OBJ node has a different type than the lclvar. + CORINFO_CLASS_HANDLE objClass = source->AsObj()->GetLayout()->GetClassHandle(); + + srcSize = compiler->info.compCompHnd->getClassSize(objClass); + layout = source->AsObj()->GetLayout(); + } + + unsigned structSize; + + unsigned dstSize = treeNode->GetStackByteSize(); + if (dstSize != srcSize) + { + // We can generate a smaller code if store size is a multiple of TARGET_POINTER_SIZE. + // The dst size can be rounded up to PUTARG_STK size. + // The src size can be rounded up if it reads a local variable slot because the local + // variable stack allocation size is rounded up to be a multiple of the TARGET_POINTER_SIZE. + // The exception is arm64 apple arguments because they can be passed without padding. + if (varNode != nullptr) + { + // If we have a varNode, even if it was casted using `OBJ`, we can read its original memory size. + const LclVarDsc* varDsc = compiler->lvaGetDesc(varNode); + const unsigned varStackSize = varDsc->lvSize(); + if (varStackSize >= srcSize) + { + srcSize = varStackSize; + } + } + } + if (dstSize == srcSize) + { + structSize = dstSize; + } + else + { + // With Unsafe object wwe can have different strange combinations: + // PutArgStk<8>(Obj<16>(LclVar<8>)) -> copy 8 bytes; + // PutArgStk<16>(Obj<16>(LclVar<8>)) -> copy 16 bytes, reading undefined memory after the local. + structSize = min(dstSize, srcSize); + } + + int remainingSize = structSize; + unsigned structOffset = 0; + unsigned nextIndex = 0; + + while (remainingSize > 0) + { + nextIndex = structOffset / TARGET_POINTER_SIZE; + + var_types type; + if (remainingSize >= TARGET_POINTER_SIZE) + { + type = layout->GetGCPtrType(nextIndex); + } + else // (remainingSize < TARGET_POINTER_SIZE) + { + // the left over size is smaller than a pointer and thus can never be a GC type + assert(!layout->IsGCPtr(nextIndex)); + + if (remainingSize >= 4) + { + type = TYP_INT; + } + else if (remainingSize >= 2) + { + type = TYP_USHORT; + } + else + { + assert(remainingSize == 1); + type = TYP_UBYTE; + } + } + + const emitAttr attr = emitTypeSize(type); + const unsigned moveSize = genTypeSize(type); + assert(EA_SIZE_IN_BYTES(attr) == moveSize); + + remainingSize -= moveSize; + + instruction loadIns = ins_Load(type); + if (varNode != nullptr) + { + // Load from our varNumImp source + emit->emitIns_R_S(loadIns, attr, loReg, varNode->GetLclNum(), structOffset); + } + else + { + assert(loReg != addrReg); + // Load from our address expression source + emit->emitIns_R_R_I(loadIns, attr, loReg, addrReg, structOffset); + } + + // Emit a store instruction to store the register into the outgoing argument area + instruction storeIns = ins_Store(type); + emit->emitIns_S_R(storeIns, attr, loReg, REG_NA, varNumOut, argOffsetOut); + argOffsetOut += moveSize; + assert(argOffsetOut <= argOffsetMax); // We can't write beyond the outgoing arg area + + structOffset += moveSize; + } + } + } +} + + +//--------------------------------------------------------------------- +// genPutArgReg - generate code for a T_PUTARG_REG node +// +// Arguments +// tree - the GT_PUTARG_REG node +// +// Return value: +// None +// +void CodeGen::genPutArgReg(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_PUTARG_REG)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + + assert(targetType != TYP_STRUCT); + + GenTree* op1 = tree->gtOp1; + genConsumeReg(op1); + + // If child node is not already in the register we need, move it + if (targetReg != op1->GetRegNum()) + { + if (emitter::isFloatReg(targetReg) == emitter::isFloatReg(op1->GetRegNum())) + { + inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); + } + else if (emitter::isFloatReg(targetReg)) + { + GetEmitter()->emitIns_R_R(INS_fcvt_d_l, EA_8BYTE, targetReg, op1->GetRegNum()); + } + else + { + assert(!emitter::isFloatReg(targetReg)); + GetEmitter()->emitIns_R_R(INS_fcvt_l_d, EA_8BYTE, targetReg, op1->GetRegNum()); + } + } + genProduceReg(tree); +} + +//--------------------------------------------------------------------- +// genPutArgSplit - generate code for a GT_PUTARG_SPLIT node +// +// Arguments +// tree - the GT_PUTARG_SPLIT node +// +// Return value: +// None +// +void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//------------------------------------------------------------------------ +// genRangeCheck: generate code for GT_BOUNDS_CHECK node. +// +void CodeGen::genRangeCheck(GenTree* oper) +{ + noway_assert(oper->OperIs(GT_BOUNDS_CHECK)); + GenTreeBoundsChk* bndsChk = oper->AsBoundsChk(); + + GenTree* arrLen = bndsChk->GetArrayLength(); + GenTree* arrIndex = bndsChk->GetIndex(); + GenTree* arrRef = NULL; + int lenOffset = 0; + + GenTree* src1; + GenTree* src2; + regNumber reg1; + regNumber reg2; + emitJumpKind jmpKind = EJ_jmp; + + genConsumeRegs(arrIndex); + genConsumeRegs(arrLen); + + emitter* emit = GetEmitter(); + GenTreeIntConCommon* intConst = nullptr; + if (arrIndex->isContainedIntOrIImmed()) + { + src1 = arrLen; + src2 = arrIndex; + reg1 = REG_RA; // TODO CHECK R21 => RA + reg2 = src1->GetRegNum(); + + intConst = src2->AsIntConCommon(); + ssize_t imm = intConst->IconValue(); + if (imm == INT64_MAX) + { + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_RA, REG_R0, -1); // TODO CHECK R21 => RA + emit->emitIns_R_R_I(INS_srli, EA_PTRSIZE, REG_RA, REG_RA, 1); // TODO CHECK R21 => RA + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); // TODO CHECK R21 => RA + } + } + else + { + src1 = arrIndex; + src2 = arrLen; + reg1 = src1->GetRegNum(); + + if (src2->isContainedIntOrIImmed()) + { + reg2 = REG_RA; // TODO CHECK R21 => RA + ssize_t imm = src2->AsIntConCommon()->IconValue(); + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); // TODO CHECK R21 => RA + } + else + { + reg2 = src2->GetRegNum(); + } + } + +#ifdef DEBUG + var_types bndsChkType = genActualType(src2->TypeGet()); + var_types src1ChkType = genActualType(src1->TypeGet()); + // Bounds checks can only be 32 or 64 bit sized comparisons. + assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG); + assert(src1ChkType == TYP_INT || src1ChkType == TYP_LONG); +#endif // DEBUG + + genJumpToThrowHlpBlk_la(bndsChk->gtThrowKind, INS_bgeu, reg1, bndsChk->gtIndRngFailBB, reg2); +} + +//--------------------------------------------------------------------- +// genCodeForPhysReg - generate code for a GT_PHYSREG node +// +// Arguments +// tree - the GT_PHYSREG node +// +// Return value: +// None +// +void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree) +{ + assert(tree->OperIs(GT_PHYSREG)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + + if (targetReg != tree->gtSrcReg) + { + inst_RV_RV(ins_Copy(targetType), targetReg, tree->gtSrcReg, targetType); + genTransferRegGCState(targetReg, tree->gtSrcReg); + } + + genProduceReg(tree); +} + +//--------------------------------------------------------------------- +// genCodeForNullCheck - generate code for a GT_NULLCHECK node +// +// Arguments +// tree - the GT_NULLCHECK node +// +// Return value: +// None +// +void CodeGen::genCodeForNullCheck(GenTreeIndir* tree) +{ + assert(tree->OperIs(GT_NULLCHECK)); + + genConsumeRegs(tree->gtOp1); + + GetEmitter()->emitInsLoadStoreOp(ins_Load(tree->TypeGet()), emitActualTypeSize(tree), REG_R0, tree); +} + +//------------------------------------------------------------------------ +// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference, +// producing the effective index by subtracting the lower bound. +// +// Arguments: +// arrIndex - the node for which we're generating code +// +// Return Value: +// None. +// +void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//------------------------------------------------------------------------ +// genCodeForArrOffset: Generates code to compute the flattened array offset for +// one dimension of an array reference: +// result = (prevDimOffset * dimSize) + effectiveIndex +// where dimSize is obtained from the arrObj operand +// +// Arguments: +// arrOffset - the node for which we're generating code +// +// Return Value: +// None. +// +// Notes: +// dimSize and effectiveIndex are always non-negative, the former by design, +// and the latter because it has been normalized to be zero-based. + +void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//------------------------------------------------------------------------ +// genCodeForShift: Generates the code sequence for a GenTree node that +// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror). +// +// Arguments: +// tree - the bit shift node (that specifies the type of bit shift to perform). +// +// Assumptions: +// a) All GenTrees are register allocated. +// +void CodeGen::genCodeForShift(GenTree* tree) +{ + emitAttr size = emitActualTypeSize(tree); + + assert(tree->GetRegNum() != REG_NA); + + genConsumeOperands(tree->AsOp()); + + GenTree* operand = tree->gtGetOp1(); + GenTree* shiftBy = tree->gtGetOp2(); + + if (tree->OperIs(GT_ROR, GT_ROL)) + { + unsigned immWidth = emitter::getBitWidth(size); // For RISCV64, immWidth will be set to 32 or 64 + if (!shiftBy->IsCnsIntOrI()) + { + regNumber shiftRight = tree->OperIs(GT_ROR) ? shiftBy->GetRegNum() : REG_RA; + regNumber shiftLeft = tree->OperIs(GT_ROR) ? REG_RA : shiftBy->GetRegNum(); + GetEmitter()->emitIns_R_R_I(INS_addi, size, REG_RA, REG_R0, immWidth); + GetEmitter()->emitIns_R_R_R(INS_sub, size, REG_RA, REG_RA, shiftBy->GetRegNum()); + if (size == EA_8BYTE) + { + GetEmitter()->emitIns_R_R_R(INS_srl, size, tree->GetRegNum(), operand->GetRegNum(), shiftRight); + GetEmitter()->emitIns_R_R_R(INS_sll, size, REG_RA, operand->GetRegNum(), shiftLeft); + } + else + { + GetEmitter()->emitIns_R_R_R(INS_srlw, size, tree->GetRegNum(), operand->GetRegNum(), shiftRight); + GetEmitter()->emitIns_R_R_R(INS_sllw, size, REG_RA, operand->GetRegNum(), shiftLeft); + } + } + else + { + unsigned shiftByImm = (unsigned)shiftBy->AsIntCon()->gtIconVal; + if (shiftByImm >= 32 && shiftByImm < 64) + { + immWidth = 64; + } + unsigned shiftRight = tree->OperIs(GT_ROR) ? shiftByImm : immWidth - shiftByImm; + unsigned shiftLeft = tree->OperIs(GT_ROR) ? immWidth - shiftByImm : shiftByImm; + if ((shiftByImm >= 32 && shiftByImm < 64) || size == EA_8BYTE) + { + GetEmitter()->emitIns_R_R_I(INS_srli, size, tree->GetRegNum(), operand->GetRegNum(), shiftRight); + GetEmitter()->emitIns_R_R_I(INS_slli, size, REG_RA, operand->GetRegNum(), shiftLeft); + } + else + { + GetEmitter()->emitIns_R_R_I(INS_srliw, size, tree->GetRegNum(), operand->GetRegNum(), shiftRight); + GetEmitter()->emitIns_R_R_I(INS_slliw, size, REG_RA, operand->GetRegNum(), shiftLeft); + } + } + GetEmitter()->emitIns_R_R_R(INS_or, size, tree->GetRegNum(), tree->GetRegNum(), REG_RA); + } + else { + if (!shiftBy->IsCnsIntOrI()) + { + instruction ins = genGetInsForOper(tree); + GetEmitter()->emitIns_R_R_R(ins, size, tree->GetRegNum(), operand->GetRegNum(), shiftBy->GetRegNum()); + } + else + { + instruction ins = genGetInsForOper(tree); + unsigned shiftByImm = (unsigned)shiftBy->AsIntCon()->gtIconVal; + + // should check shiftByImm for riscv64-ins. + unsigned immWidth = emitter::getBitWidth(size); // For RISCV64, immWidth will be set to 32 or 64 + shiftByImm &= (immWidth - 1); + + if (ins == INS_slliw && shiftByImm >= 32) + { + ins = INS_slli; + } + else if (ins == INS_slli && shiftByImm >= 32 && shiftByImm < 64) + { + ins = INS_slli; + } + else if (ins == INS_srai && shiftByImm >= 32 && shiftByImm < 64) + { + ins = INS_srai; + } + else if (ins == INS_srli && shiftByImm >= 32 && shiftByImm < 64) + { + ins = INS_srli; + } + GetEmitter()->emitIns_R_R_I(ins, size, tree->GetRegNum(), operand->GetRegNum(), shiftByImm); + } + } + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR. +// +// Arguments: +// tree - the node. +// +void CodeGen::genCodeForLclAddr(GenTreeLclVarCommon* lclAddrNode) +{ + assert(lclAddrNode->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR)); + + var_types targetType = lclAddrNode->TypeGet(); + emitAttr size = emitTypeSize(targetType); + regNumber targetReg = lclAddrNode->GetRegNum(); + + // Address of a local var. + noway_assert((targetType == TYP_BYREF) || (targetType == TYP_I_IMPL)); + + GetEmitter()->emitIns_R_S(INS_lea, size, targetReg, lclAddrNode->GetLclNum(), lclAddrNode->GetLclOffs()); + + genProduceReg(lclAddrNode); +} + +//------------------------------------------------------------------------ +// genCodeForLclFld: Produce code for a GT_LCL_FLD node. +// +// Arguments: +// tree - the GT_LCL_FLD node +// +void CodeGen::genCodeForLclFld(GenTreeLclFld* tree) +{ + assert(tree->OperIs(GT_LCL_FLD)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + emitter* emit = GetEmitter(); + + NYI_IF(targetType == TYP_STRUCT, "GT_LCL_FLD: struct load local field not supported"); + assert(targetReg != REG_NA); + + emitAttr size = emitTypeSize(targetType); + unsigned offs = tree->GetLclOffs(); + unsigned varNum = tree->GetLclNum(); + assert(varNum < compiler->lvaCount); + + emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs); + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genScaledAdd: A helper for `dest = base + (index << scale)` +// and maybe optimize the instruction(s) for this operation. +// +void CodeGen::genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale) +{ + assert((scale >> 5) == 0); + emitter* emit = GetEmitter(); + if (scale == 0) + { + instruction ins = attr == EA_4BYTE ? INS_addw : INS_add; + // target = base + index + emit->emitIns_R_R_R(ins, attr, targetReg, baseReg, indexReg); + } + else + { + instruction ins; + instruction ins2; + if (attr == EA_4BYTE) + { + ins = INS_slliw; + ins2 = INS_addw; + } + else + { + ins = INS_slli; + ins2 = INS_add; + } + + // target = base + index << scale + emit->emitIns_R_R_I(ins, attr, REG_RA, indexReg, scale); // TODO CHECK SIDE EFFECT WHEN R21 => RA + emit->emitIns_R_R_R(ins2, attr, targetReg, baseReg, REG_RA); + } +} + +//------------------------------------------------------------------------ +// genCodeForIndexAddr: Produce code for a GT_INDEX_ADDR node. +// +// Arguments: +// tree - the GT_INDEX_ADDR node +// +void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) +{ + GenTree* const base = node->Arr(); + GenTree* const index = node->Index(); + + genConsumeReg(base); + genConsumeReg(index); + + // NOTE: `genConsumeReg` marks the consumed register as not a GC pointer, as it assumes that the input registers + // die at the first instruction generated by the node. This is not the case for `INDEX_ADDR`, however, as the + // base register is multiply-used. As such, we need to mark the base register as containing a GC pointer until + // we are finished generating the code for this node. + + gcInfo.gcMarkRegPtrVal(base->GetRegNum(), base->TypeGet()); + assert(!varTypeIsGC(index->TypeGet())); + + // The index is never contained, even if it is a constant. + assert(index->isUsedFromReg()); + + // Generate the bounds check if necessary. + if (node->IsBoundsChecked()) + { + GetEmitter()->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_RA, base->GetRegNum(), node->gtLenOffset); // TODO CHECK SIDE EFFECT WHEN R21 => RA + // if (index >= REG_RA) + // { + // JumpToThrowHlpBlk; + // } + // + // sltu REG_RA, index, REG_RA + // bne REG_RA, zero, RngChkExit + // IndRngFail: + // ... + // RngChkExit: + genJumpToThrowHlpBlk_la(SCK_RNGCHK_FAIL, INS_bgeu, index->GetRegNum(), node->gtIndRngFailBB, REG_RA); + } + + emitAttr attr = emitActualTypeSize(node); + // Can we use a shift instruction for multiply ? + // + if (isPow2(node->gtElemSize)) + { + DWORD scale; + BitScanForward(&scale, node->gtElemSize); + + // dest = base + (index << scale) + if (node->gtElemSize <= 64) + { + genScaledAdd(attr, node->GetRegNum(), base->GetRegNum(), index->GetRegNum(), scale); + } + else + { + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_RA, scale); // TODO CHECK SIDE EFFECT WHEN R21 => RA + + instruction ins; + instruction ins2; + if (attr == EA_4BYTE) + { + ins = INS_sllw; + ins2 = INS_addw; + } + else + { + ins = INS_sll; + ins2 = INS_add; + } + GetEmitter()->emitIns_R_R_R(ins, attr, REG_RA, index->GetRegNum(), REG_RA); // TODO CHECK SIDE EFFECT WHEN R21 => RA + GetEmitter()->emitIns_R_R_R(ins2, attr, node->GetRegNum(), REG_RA, base->GetRegNum()); // TODO CHECK SIDE EFFECT WHEN R21 => RA + } + } + else // we have to load the element size and use a MADD (multiply-add) instruction + { + // REG_RA = element size + instGen_Set_Reg_To_Imm(EA_4BYTE, REG_RA, (ssize_t)node->gtElemSize); // TODO CHECK SIDE EFFECT WHEN R21 => RA + + // dest = index * REG_RA + base + instruction ins; + instruction ins2; + if (attr == EA_4BYTE) + { + ins = INS_mulw; + ins2 = INS_addw; + } + else + { + ins = INS_mul; + ins2 = INS_add; + } + GetEmitter()->emitIns_R_R_R(ins, EA_PTRSIZE, REG_RA, index->GetRegNum(), REG_RA); // TODO CHECK SIDE EFFECT WHEN R21 => RA + GetEmitter()->emitIns_R_R_R(ins2, attr, node->GetRegNum(), REG_RA, base->GetRegNum()); // TODO CHECK SIDE EFFECT WHEN R21 => RA + } + + // dest = dest + elemOffs + GetEmitter()->emitIns_R_R_I(INS_addi, attr, node->GetRegNum(), node->GetRegNum(), node->gtElemOffset); + + gcInfo.gcMarkRegSetNpt(base->gtGetRegMask()); + + genProduceReg(node); +} + +//------------------------------------------------------------------------ +// genCodeForIndir: Produce code for a GT_IND node. +// +// Arguments: +// tree - the GT_IND node +// +void CodeGen::genCodeForIndir(GenTreeIndir* tree) +{ + assert(tree->OperIs(GT_IND)); + +#ifdef FEATURE_SIMD + // Handling of Vector3 type values loaded through indirection. + if (tree->TypeGet() == TYP_SIMD12) + { + genLoadIndTypeSIMD12(tree); + return; + } +#endif // FEATURE_SIMD + + var_types type = tree->TypeGet(); + instruction ins = ins_Load(type); + instruction ins2 = INS_none; + regNumber targetReg = tree->GetRegNum(); + regNumber tmpReg = targetReg; + emitAttr attr = emitActualTypeSize(type); + int offset = 0; + + genConsumeAddress(tree->Addr()); + + if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) + { + instGen_MemoryBarrier(BARRIER_FULL); + } + + GetEmitter()->emitInsLoadStoreOp(ins, emitActualTypeSize(type), targetReg, tree); + + genProduceReg(tree); +} + +//---------------------------------------------------------------------------------- +// genCodeForCpBlkHelper - Generate code for a CpBlk node by the means of the VM memcpy helper call +// +// Arguments: +// cpBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK] +// +// Preconditions: +// The register assignments have been set appropriately. +// This is validated by genConsumeBlockOp(). +// +void CodeGen::genCodeForCpBlkHelper(GenTreeBlk* cpBlkNode) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//---------------------------------------------------------------------------------- +// genCodeForCpBlkUnroll: Generates CpBlk code by performing a loop unroll +// +// Arguments: +// cpBlkNode - Copy block node +// +// Return Value: +// None +// +// Assumption: +// The size argument of the CpBlk node is a constant and <= CPBLK_UNROLL_LIMIT bytes. +// +void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) +{ + assert(cpBlkNode->OperIs(GT_STORE_BLK)); + + unsigned dstLclNum = BAD_VAR_NUM; + regNumber dstAddrBaseReg = REG_NA; + int dstOffset = 0; + GenTree* dstAddr = cpBlkNode->Addr(); + + if (!dstAddr->isContained()) + { + dstAddrBaseReg = genConsumeReg(dstAddr); + } + else if (dstAddr->OperIsAddrMode()) + { + assert(!dstAddr->AsAddrMode()->HasIndex()); + + dstAddrBaseReg = genConsumeReg(dstAddr->AsAddrMode()->Base()); + dstOffset = dstAddr->AsAddrMode()->Offset(); + } + else + { + assert(dstAddr->OperIsLocalAddr()); + dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum(); + dstOffset = dstAddr->AsLclVarCommon()->GetLclOffs(); + } + + unsigned srcLclNum = BAD_VAR_NUM; + regNumber srcAddrBaseReg = REG_NA; + int srcOffset = 0; + GenTree* src = cpBlkNode->Data(); + + assert(src->isContained()); + + if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + srcLclNum = src->AsLclVarCommon()->GetLclNum(); + srcOffset = src->AsLclVarCommon()->GetLclOffs(); + } + else + { + assert(src->OperIs(GT_IND)); + GenTree* srcAddr = src->AsIndir()->Addr(); + + if (!srcAddr->isContained()) + { + srcAddrBaseReg = genConsumeReg(srcAddr); + } + else if (srcAddr->OperIsAddrMode()) + { + srcAddrBaseReg = genConsumeReg(srcAddr->AsAddrMode()->Base()); + srcOffset = srcAddr->AsAddrMode()->Offset(); + } + else + { + assert(srcAddr->OperIsLocalAddr()); + srcLclNum = srcAddr->AsLclVarCommon()->GetLclNum(); + srcOffset = srcAddr->AsLclVarCommon()->GetLclOffs(); + } + } + + if (cpBlkNode->IsVolatile()) + { + // issue a full memory barrier before a volatile CpBlk operation + instGen_MemoryBarrier(); + } + + emitter* emit = GetEmitter(); + unsigned size = cpBlkNode->GetLayout()->GetSize(); + + assert(size <= INT32_MAX); + assert(srcOffset < INT32_MAX - static_cast(size)); + assert(dstOffset < INT32_MAX - static_cast(size)); + + regNumber tempReg = cpBlkNode->ExtractTempReg(RBM_ALLINT); + + if (size >= 2 * REGSIZE_BYTES) + { + regNumber tempReg2 = REG_RA; // TODO REG_R21 => REG_RA + + for (unsigned regSize = 2 * REGSIZE_BYTES; size >= regSize; + size -= regSize, srcOffset += regSize, dstOffset += regSize) + { + if (srcLclNum != BAD_VAR_NUM) + { + emit->emitIns_R_S(INS_ld, EA_8BYTE, tempReg, srcLclNum, srcOffset); + emit->emitIns_R_S(INS_ld, EA_8BYTE, tempReg2, srcLclNum, srcOffset + 8); + } + else + { + emit->emitIns_R_R_I(INS_ld, EA_8BYTE, tempReg, srcAddrBaseReg, srcOffset); + emit->emitIns_R_R_I(INS_ld, EA_8BYTE, tempReg2, srcAddrBaseReg, srcOffset + 8); + } + + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(INS_sd, EA_8BYTE, tempReg, REG_NA, dstLclNum, dstOffset); + emit->emitIns_S_R(INS_sd, EA_8BYTE, tempReg2, REG_NA, dstLclNum, dstOffset + 8); + } + else + { + emit->emitIns_R_R_I(INS_sd, EA_8BYTE, tempReg, dstAddrBaseReg, dstOffset); + emit->emitIns_R_R_I(INS_sd, EA_8BYTE, tempReg2, dstAddrBaseReg, dstOffset + 8); + } + } + } + + for (unsigned regSize = REGSIZE_BYTES; size > 0; size -= regSize, srcOffset += regSize, dstOffset += regSize) + { + while (regSize > size) + { + regSize /= 2; + } + + instruction loadIns; + instruction storeIns; + emitAttr attr; + + switch (regSize) + { + case 1: + loadIns = INS_lb; + storeIns = INS_sb; + attr = EA_4BYTE; + break; + case 2: + loadIns = INS_lh; + storeIns = INS_sh; + attr = EA_4BYTE; + break; + case 4: + loadIns = INS_lw; + storeIns = INS_sw; + attr = EA_ATTR(regSize); + break; + case 8: + loadIns = INS_ld; + storeIns = INS_sd; + attr = EA_ATTR(regSize); + break; + default: + unreached(); + } + + if (srcLclNum != BAD_VAR_NUM) + { + emit->emitIns_R_S(loadIns, attr, tempReg, srcLclNum, srcOffset); + } + else + { + emit->emitIns_R_R_I(loadIns, attr, tempReg, srcAddrBaseReg, srcOffset); + } + + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(storeIns, attr, tempReg, REG_NA, dstLclNum, dstOffset); + } + else + { + emit->emitIns_R_R_I(storeIns, attr, tempReg, dstAddrBaseReg, dstOffset); + } + } + + if (cpBlkNode->IsVolatile()) + { + // issue a load barrier after a volatile CpBlk operation + instGen_MemoryBarrier(BARRIER_LOAD_ONLY); + } +} + +//------------------------------------------------------------------------ +// genCodeForInitBlkHelper - Generate code for an InitBlk node by the means of the VM memcpy helper call +// +// Arguments: +// initBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK] +// +// Preconditions: +// The register assignments have been set appropriately. +// This is validated by genConsumeBlockOp(). +// +void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) +{ + // Size goes in arg2, source address goes in arg1, and size goes in arg2. + // genConsumeBlockOp takes care of this for us. + genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); + + if (initBlkNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before a volatile initBlock Operation + instGen_MemoryBarrier(); + } + + genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); +} + +// Generate code for a load from some address + offset +// base: tree node which can be either a local address or arbitrary node +// offset: distance from the base from which to load +void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//------------------------------------------------------------------------ +// genCall: Produce code for a GT_CALL node +// +void CodeGen::genCall(GenTreeCall* call) +{ + // Consume all the arg regs + for (CallArg& arg : call->gtArgs.LateArgs()) + { + CallArgABIInformation& abiInfo = arg.AbiInfo; + GenTree* argNode = arg.GetLateNode(); + + // GT_RELOAD/GT_COPY use the child node + argNode = argNode->gtSkipReloadOrCopy(); + + if (abiInfo.GetRegNum() == REG_STK) + { + continue; + } + + // Deal with multi register passed struct args. + if (argNode->OperGet() == GT_FIELD_LIST) + { + for (GenTreeFieldList::Use& use : argNode->AsFieldList()->Uses()) + { + GenTree* putArgRegNode = use.GetNode(); + assert(putArgRegNode->gtOper == GT_PUTARG_REG); + + genConsumeReg(putArgRegNode); + } + } + else if (abiInfo.IsSplit()) + { + assert(compFeatureArgSplit()); + + GenTreePutArgSplit* splitNode = argNode->AsPutArgSplit(); + genConsumeArgSplitStruct(splitNode); + + regNumber argReg = abiInfo.GetRegNum(); + regNumber allocReg = splitNode->GetRegNumByIdx(0); + var_types regType = splitNode->GetRegType(0); + + // For LA64's ABI, the split is only using the A7 and stack for passing arg. + assert(argReg == REG_A7); + assert(emitter::isGeneralRegister(allocReg)); + assert(abiInfo.NumRegs == 1); + + inst_Mov(regType, argReg, allocReg, /* canSkip */ true); + } + else + { + regNumber argReg = abiInfo.GetRegNum(); + genConsumeReg(argNode); + var_types dstType = emitter::isFloatReg(argReg) ? TYP_DOUBLE : argNode->TypeGet(); + inst_Mov(dstType, argReg, argNode->GetRegNum(), /* canSkip */ true); + } + } + + // Insert a null check on "this" pointer if asked. + if (call->NeedsNullCheck()) + { + const regNumber regThis = genGetThisArgReg(call); + + GetEmitter()->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, regThis, 0); + } + + // If fast tail call, then we are done here, we just have to load the call + // target into the right registers. We ensure in RA that target is loaded + // into a volatile register that won't be restored by epilog sequence. + if (call->IsFastTailCall()) + { + GenTree* target = getCallTarget(call, nullptr); + + if (target != nullptr) + { + // Indirect fast tail calls materialize call target either in gtControlExpr or in gtCallAddr. + genConsumeReg(target); + } +#ifdef FEATURE_READYTORUN + else if (call->IsR2ROrVirtualStubRelativeIndir()) + { + assert(((call->IsR2RRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_PVALUE)) || + ((call->IsVirtualStubRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_VALUE))); + assert(call->gtControlExpr == nullptr); + + regNumber tmpReg = call->GetSingleTempReg(); + // Register where we save call address in should not be overridden by epilog. + assert((tmpReg & (RBM_INT_CALLEE_TRASH & ~RBM_RA)) == tmpReg); + + regNumber callAddrReg = + call->IsVirtualStubRelativeIndir() ? compiler->virtualStubParamInfo->GetReg() : REG_R2R_INDIRECT_PARAM; + GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), tmpReg, callAddrReg); + // We will use this again when emitting the jump in genCallInstruction in the epilog + call->gtRsvdRegs |= genRegMask(tmpReg); + } +#endif + + return; + } + + // For a pinvoke to unmanaged code we emit a label to clear + // the GC pointer state before the callsite. + // We can't utilize the typical lazy killing of GC pointers + // at (or inside) the callsite. + if (compiler->killGCRefs(call)) + { + genDefineTempLabel(genCreateTempLabel()); + } + + genCallInstruction(call); + + // for pinvoke/intrinsic/tailcalls we may have needed to get the address of + // a label. In case it is indirect with CFG enabled make sure we do not get + // the address after the validation but only after the actual call that + // comes after. + if (genPendingCallLabel && !call->IsHelperCall(compiler, CORINFO_HELP_VALIDATE_INDIRECT_CALL)) + { + genDefineInlineTempLabel(genPendingCallLabel); + genPendingCallLabel = nullptr; + } + +#ifdef DEBUG + // We should not have GC pointers in killed registers live around the call. + // GC info for arg registers were cleared when consuming arg nodes above + // and LSRA should ensure it for other trashed registers. + regMaskTP killMask = RBM_CALLEE_TRASH; + if (call->IsHelperCall()) + { + CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd); + killMask = compiler->compHelperCallKillSet(helpFunc); + } + + assert((gcInfo.gcRegGCrefSetCur & killMask) == 0); + assert((gcInfo.gcRegByrefSetCur & killMask) == 0); +#endif + + var_types returnType = call->TypeGet(); + if (returnType != TYP_VOID) + { + regNumber returnReg; + + if (call->HasMultiRegRetVal()) + { + const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); + assert(pRetTypeDesc != nullptr); + unsigned regCount = pRetTypeDesc->GetReturnRegCount(); + + // If regs allocated to call node are different from ABI return + // regs in which the call has returned its result, move the result + // to regs allocated to call node. + for (unsigned i = 0; i < regCount; ++i) + { + var_types regType = pRetTypeDesc->GetReturnRegType(i); + returnReg = pRetTypeDesc->GetABIReturnReg(i); + regNumber allocatedReg = call->GetRegNumByIdx(i); + inst_Mov(regType, allocatedReg, returnReg, /* canSkip */ true); + } + } + else + { + if (varTypeUsesFloatArgReg(returnType)) + { + returnReg = REG_FLOATRET; + } + else + { + returnReg = REG_INTRET; + } + + if (call->GetRegNum() != returnReg) + { + inst_Mov(returnType, call->GetRegNum(), returnReg, /* canSkip */ false); + } + } + + genProduceReg(call); + } + + // If there is nothing next, that means the result is thrown away, so this value is not live. + // However, for minopts or debuggable code, we keep it live to support managed return value debugging. + if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode) + { + gcInfo.gcMarkRegSetNpt(RBM_INTRET); + } +} + +//------------------------------------------------------------------------ +// genCallInstruction - Generate instructions necessary to transfer control to the call. +// +// Arguments: +// call - the GT_CALL node +// +// Remaks: +// For tailcalls this function will generate a jump. +// +void CodeGen::genCallInstruction(GenTreeCall* call) +{ + // Determine return value size(s). + const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); + emitAttr retSize = EA_PTRSIZE; + emitAttr secondRetSize = EA_UNKNOWN; + + if (call->HasMultiRegRetVal()) + { + retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0)); + secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1)); + } + else + { + assert(call->gtType != TYP_STRUCT); + + if (call->gtType == TYP_REF) + { + retSize = EA_GCREF; + } + else if (call->gtType == TYP_BYREF) + { + retSize = EA_BYREF; + } + } + + DebugInfo di; + // We need to propagate the debug information to the call instruction, so we can emit + // an IL to native mapping record for the call, to support managed return value debugging. + // We don't want tail call helper calls that were converted from normal calls to get a record, + // so we skip this hash table lookup logic in that case. + if (compiler->opts.compDbgInfo && compiler->genCallSite2DebugInfoMap != nullptr && !call->IsTailCall()) + { + (void)compiler->genCallSite2DebugInfoMap->Lookup(call, &di); + } + + CORINFO_SIG_INFO* sigInfo = nullptr; +#ifdef DEBUG + // Pass the call signature information down into the emitter so the emitter can associate + // native call sites with the signatures they were generated from. + if (call->gtCallType != CT_HELPER) + { + sigInfo = call->callSig; + } + + if (call->IsFastTailCall()) + { + regMaskTP trashedByEpilog = RBM_CALLEE_SAVED; + + // The epilog may use and trash REG_GSCOOKIE_TMP_0/1. Make sure we have no + // non-standard args that may be trash if this is a tailcall. + if (compiler->getNeedsGSSecurityCookie()) + { + trashedByEpilog |= genRegMask(REG_GSCOOKIE_TMP_0); + trashedByEpilog |= genRegMask(REG_GSCOOKIE_TMP_1); + } + + for (CallArg& arg : call->gtArgs.Args()) + { + for (unsigned j = 0; j < arg.AbiInfo.NumRegs; j++) + { + regNumber reg = arg.AbiInfo.GetRegNum(j); + if ((trashedByEpilog & genRegMask(reg)) != 0) + { + JITDUMP("Tail call node:\n"); + DISPTREE(call); + JITDUMP("Register used: %s\n", getRegName(reg)); + assert(!"Argument to tailcall may be trashed by epilog"); + } + } + } + } +#endif // DEBUG + CORINFO_METHOD_HANDLE methHnd; + GenTree* target = getCallTarget(call, &methHnd); + + if (target != nullptr) + { + // A call target can not be a contained indirection + assert(!target->isContainedIndir()); + + // For fast tailcall we have already consumed the target. We ensure in + // RA that the target was allocated into a volatile register that will + // not be messed up by epilog sequence. + if (!call->IsFastTailCall()) + { + genConsumeReg(target); + } + + // We have already generated code for gtControlExpr evaluating it into a register. + // We just need to emit "call reg" in this case. + // + assert(genIsValidIntReg(target->GetRegNum())); + + // clang-format off + genEmitCall(emitter::EC_INDIR_R, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + nullptr, // addr + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + di, + target->GetRegNum(), + call->IsFastTailCall()); + // clang-format on + } + else + { + // If we have no target and this is a call with indirection cell then + // we do an optimization where we load the call address directly from + // the indirection cell instead of duplicating the tree. In BuildCall + // we ensure that get an extra register for the purpose. Note that for + // CFG the call might have changed to + // CORINFO_HELP_DISPATCH_INDIRECT_CALL in which case we still have the + // indirection cell but we should not try to optimize. + regNumber callThroughIndirReg = REG_NA; + if (!call->IsHelperCall(compiler, CORINFO_HELP_DISPATCH_INDIRECT_CALL)) + { + callThroughIndirReg = getCallIndirectionCellReg(call); + } + + if (callThroughIndirReg != REG_NA) + { + assert(call->IsR2ROrVirtualStubRelativeIndir()); + regNumber targetAddrReg = call->GetSingleTempReg(); + // For fast tailcalls we have already loaded the call target when processing the call node. + if (!call->IsFastTailCall()) + { + GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), targetAddrReg, + callThroughIndirReg); + } + else + { + // Register where we save call address in should not be overridden by epilog. + assert((targetAddrReg & (RBM_INT_CALLEE_TRASH & ~RBM_RA)) == targetAddrReg); + } + + // We have now generated code loading the target address from the indirection cell into `targetAddrReg`. + // We just need to emit "bl targetAddrReg" in this case. + // + assert(genIsValidIntReg(targetAddrReg)); + + // clang-format off + genEmitCall(emitter::EC_INDIR_R, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + nullptr, // addr + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + di, + targetAddrReg, + call->IsFastTailCall()); + // clang-format on + } + else + { + // Generate a direct call to a non-virtual user defined or helper method + assert(call->gtCallType == CT_HELPER || call->gtCallType == CT_USER_FUNC); + + void* addr = nullptr; +#ifdef FEATURE_READYTORUN + if (call->gtEntryPoint.addr != NULL) + { + assert(call->gtEntryPoint.accessType == IAT_VALUE); + addr = call->gtEntryPoint.addr; + } + else +#endif // FEATURE_READYTORUN + if (call->gtCallType == CT_HELPER) + { + CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); + noway_assert(helperNum != CORINFO_HELP_UNDEF); + + void* pAddr = nullptr; + addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); + assert(pAddr == nullptr); + } + else + { + // Direct call to a non-virtual user function. + addr = call->gtDirectCallAddress; + } + + assert(addr != nullptr); + + // clang-format off + genEmitCall(emitter::EC_FUNC_TOKEN, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + addr, + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + di, + REG_NA, + call->IsFastTailCall()); + // clang-format on + } + } +} + +// Produce code for a GT_JMP node. +// The arguments of the caller needs to be transferred to the callee before exiting caller. +// The actual jump to callee is generated as part of caller epilog sequence. +// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup. +void CodeGen::genJmpMethod(GenTree* jmp) +{ + NYI("unimplemented on RISCV64 yet"); +} + +//------------------------------------------------------------------------ +// genIntCastOverflowCheck: Generate overflow checking code for an integer cast. +// +// Arguments: +// cast - The GT_CAST node +// desc - The cast description +// reg - The register containing the value to check +// +void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& desc, regNumber reg) +{ + switch (desc.CheckKind()) + { + case GenIntCastDesc::CHECK_POSITIVE: + { + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, reg, nullptr, REG_R0); + } + break; + + case GenIntCastDesc::CHECK_UINT_RANGE: + { + // We need to check if the value is not greater than 0xFFFFFFFF + // if the upper 32 bits are zero. + ssize_t imm = -1; + GetEmitter()->emitIns_R_R_I(INS_addi, EA_8BYTE, REG_RA, REG_R0, imm); + + GetEmitter()->emitIns_R_R_I(INS_slli, EA_8BYTE, REG_RA, REG_RA, 32); + GetEmitter()->emitIns_R_R_R(INS_and, EA_8BYTE, REG_RA, reg, REG_RA); + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_RA); + } + break; + + case GenIntCastDesc::CHECK_POSITIVE_INT_RANGE: + { + // We need to check if the value is not greater than 0x7FFFFFFF + // if the upper 33 bits are zero. + // instGen_Set_Reg_To_Imm(EA_8BYTE, REG_RA, 0xFFFFFFFF80000000LL); + ssize_t imm = -1; + GetEmitter()->emitIns_R_R_I(INS_addi, EA_8BYTE, REG_RA, REG_R0, imm); + + GetEmitter()->emitIns_R_R_I(INS_slli, EA_8BYTE, REG_RA, REG_RA, 31); + + GetEmitter()->emitIns_R_R_R(INS_and, EA_8BYTE, REG_RA, reg, REG_RA); + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_RA); + } + break; + + case GenIntCastDesc::CHECK_INT_RANGE: + { + const regNumber tempReg = rsGetRsvdReg(); + assert(tempReg != reg); + GetEmitter()->emitIns_I_la(EA_8BYTE, tempReg, INT32_MAX); + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, tempReg, nullptr, reg); + + GetEmitter()->emitIns_I_la(EA_8BYTE, tempReg, INT32_MIN); + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, reg, nullptr, tempReg); + } + break; + + default: + { + assert(desc.CheckKind() == GenIntCastDesc::CHECK_SMALL_INT_RANGE); + const int castMaxValue = desc.CheckSmallIntMax(); + const int castMinValue = desc.CheckSmallIntMin(); + instruction ins; + + if (castMaxValue > 2047) + { + assert((castMaxValue == 32767) || (castMaxValue == 65535)); + GetEmitter()->emitIns_I_la(EA_ATTR(desc.CheckSrcSize()), REG_RA, castMaxValue + 1); + ins = castMinValue == 0 ? INS_bgeu : INS_bge; + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, reg, nullptr, REG_RA); + } + else + { + GetEmitter()->emitIns_R_R_I(INS_addiw, EA_ATTR(desc.CheckSrcSize()), REG_RA, REG_R0, castMaxValue); + ins = castMinValue == 0 ? INS_bltu : INS_blt; + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, REG_RA, nullptr, reg); + } + + if (castMinValue != 0) + { + if (emitter::isValidSimm12(castMinValue)) + { + GetEmitter()->emitIns_R_R_I(INS_slti, EA_ATTR(desc.CheckSrcSize()), REG_RA, reg, castMinValue); + } + else + { + GetEmitter()->emitIns_I_la(EA_8BYTE, REG_RA, castMinValue); + GetEmitter()->emitIns_R_R_R(INS_slt, EA_ATTR(desc.CheckSrcSize()), REG_RA, reg, REG_RA); + } + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_RA); + } + } + break; + } +} + +void CodeGen::genIntToIntCast(GenTreeCast* cast) +{ + genConsumeRegs(cast->gtGetOp1()); + + emitter* emit = GetEmitter(); + var_types dstType = cast->CastToType(); + var_types srcType = genActualType(cast->gtGetOp1()->TypeGet()); + const regNumber srcReg = cast->gtGetOp1()->GetRegNum(); + const regNumber dstReg = cast->GetRegNum(); + const unsigned char size = 32; + + assert(genIsValidIntReg(srcReg)); + assert(genIsValidIntReg(dstReg)); + + GenIntCastDesc desc(cast); + + if (desc.CheckKind() != GenIntCastDesc::CHECK_NONE) + { + genIntCastOverflowCheck(cast, desc, srcReg); + } + + if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg)) + { + instruction ins; + + switch (desc.ExtendKind()) + { + case GenIntCastDesc::ZERO_EXTEND_SMALL_INT: + if (desc.ExtendSrcSize() == 1) + { + emit->emitIns_R_R_I(INS_slli, EA_PTRSIZE, dstReg, srcReg, 64 - 8); + emit->emitIns_R_R_I(INS_srli, EA_PTRSIZE, dstReg, dstReg, 64 - 8); + } + else + { + + emit->emitIns_R_R_I(INS_slli, EA_PTRSIZE, dstReg, srcReg, 64 - 16); + emit->emitIns_R_R_I(INS_srli, EA_PTRSIZE, dstReg, dstReg, 64 - 16); + } + break; + case GenIntCastDesc::SIGN_EXTEND_SMALL_INT: + if (desc.ExtendSrcSize() == 1) + { + emit->emitIns_R_R_I(INS_slli, EA_PTRSIZE, dstReg, srcReg, 64 - 8); + emit->emitIns_R_R_I(INS_srai, EA_PTRSIZE, dstReg, dstReg, 64 - 8); + } + else + { + emit->emitIns_R_R_I(INS_slli, EA_PTRSIZE, dstReg, srcReg, 64 - 16); + emit->emitIns_R_R_I(INS_srai, EA_PTRSIZE, dstReg, dstReg, 64 - 16); + } + break; + + case GenIntCastDesc::ZERO_EXTEND_INT: + + emit->emitIns_R_R_I(INS_slli, EA_PTRSIZE, dstReg, srcReg, 32); + emit->emitIns_R_R_I(INS_srli, EA_PTRSIZE, dstReg, dstReg, 32); + break; + case GenIntCastDesc::SIGN_EXTEND_INT: + emit->emitIns_R_R_I(INS_slliw, EA_4BYTE, dstReg, srcReg, 0); + break; + + default: + assert(desc.ExtendKind() == GenIntCastDesc::COPY); + if (srcType == TYP_INT) + { + emit->emitIns_R_R_I(INS_slliw, EA_4BYTE, dstReg, srcReg, 0); + } + else + { + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, dstReg, srcReg, 0); + } + break; + } + } + + genProduceReg(cast); +} + +//------------------------------------------------------------------------ +// genFloatToFloatCast: Generate code for a cast between float and double +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// The cast is between float and double. +// +void CodeGen::genFloatToFloatCast(GenTree* treeNode) +{ + // float <--> double conversions are always non-overflow ones + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->GetRegNum(); + assert(genIsValidFloatReg(targetReg)); + + GenTree* op1 = treeNode->AsOp()->gtOp1; + assert(!op1->isContained()); // Cannot be contained + assert(genIsValidFloatReg(op1->GetRegNum())); // Must be a valid float reg. + + var_types dstType = treeNode->CastToType(); + var_types srcType = op1->TypeGet(); + assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); + + genConsumeOperands(treeNode->AsOp()); + + // treeNode must be a reg + assert(!treeNode->isContained()); + + if (srcType != dstType) + { + instruction ins = (srcType == TYP_FLOAT) ? INS_fcvt_d_w // convert Single to Double + : INS_fcvt_w_d; // convert Double to Single + + GetEmitter()->emitIns_R_R(ins, emitActualTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum()); + } + else if (treeNode->GetRegNum() != op1->GetRegNum()) + { + // If double to double cast or float to float cast. Emit a move instruction. + instruction ins = (srcType == TYP_FLOAT) ? INS_fsgnj_s : INS_fsgnj_d; + GetEmitter()->emitIns_R_R_R(ins, emitActualTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum(), op1->GetRegNum()); + } + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCreateAndStoreGCInfo: Create and record GC Info for the function. +// +void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, + unsigned prologSize, + unsigned epilogSize DEBUGARG(void* codePtr)) +{ + IAllocator* allowZeroAlloc = new (compiler, CMK_GC) CompIAllocator(compiler->getAllocatorGC()); + GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) + GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM); + assert(gcInfoEncoder != nullptr); + + // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32). + gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize); + + // We keep the call count for the second call to gcMakeRegPtrTable() below. + unsigned callCnt = 0; + + // First we figure out the encoder ID's for the stack slots and registers. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt); + + // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them). + gcInfoEncoder->FinalizeSlotIds(); + + // Now we can actually use those slot ID's to declare live ranges. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt); + + if (compiler->opts.compDbgEnC) + { + // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp) + // which is: + // -return address + // -saved off RBP + // -saved 'this' pointer and bool for synchronized methods + + // 4 slots for RBP + return address + RSI + RDI + int preservedAreaSize = 4 * REGSIZE_BYTES; + + if (compiler->info.compFlags & CORINFO_FLG_SYNCH) + { + if (!(compiler->info.compFlags & CORINFO_FLG_STATIC)) + { + preservedAreaSize += REGSIZE_BYTES; + } + + preservedAreaSize += 1; // bool for synchronized methods + } + + // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the + // frame + gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize); + } + + if (compiler->opts.IsReversePInvoke()) + { + unsigned reversePInvokeFrameVarNumber = compiler->lvaReversePInvokeFrameVar; + assert(reversePInvokeFrameVarNumber != BAD_VAR_NUM); + const LclVarDsc* reversePInvokeFrameVar = compiler->lvaGetDesc(reversePInvokeFrameVarNumber); + gcInfoEncoder->SetReversePInvokeFrameSlot(reversePInvokeFrameVar->GetStackOffset()); + } + + gcInfoEncoder->Build(); + + // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) + // let's save the values anyway for debugging purposes + compiler->compInfoBlkAddr = gcInfoEncoder->Emit(); + compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface +} + +//------------------------------------------------------------------------ +// genCodeForStoreBlk: Produce code for a GT_STORE_OBJ/GT_STORE_DYN_BLK/GT_STORE_BLK node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) +{ + assert(blkOp->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK)); + + if (blkOp->OperIs(GT_STORE_OBJ)) + { + assert(!blkOp->gtBlkOpGcUnsafe); + assert(blkOp->OperIsCopyBlkOp()); + assert(blkOp->AsObj()->GetLayout()->HasGCPtr()); + genCodeForCpObj(blkOp->AsObj()); + return; + } + if (blkOp->gtBlkOpGcUnsafe) + { + GetEmitter()->emitDisableGC(); + } + bool isCopyBlk = blkOp->OperIsCopyBlkOp(); + + switch (blkOp->gtBlkOpKind) + { + case GenTreeBlk::BlkOpKindHelper: + if (isCopyBlk) + { + genCodeForCpBlkHelper(blkOp); + } + else + { + genCodeForInitBlkHelper(blkOp); + } + break; + + case GenTreeBlk::BlkOpKindUnroll: + if (isCopyBlk) + { + genCodeForCpBlkUnroll(blkOp); + } + else + { + genCodeForInitBlkUnroll(blkOp); + } + break; + + default: + unreached(); + } + + if (blkOp->gtBlkOpGcUnsafe) + { + GetEmitter()->emitEnableGC(); + } +} + +//------------------------------------------------------------------------ +// genLeaInstruction: Produce code for a GT_LEA node. +// +// Arguments: +// lea - the node +// +void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) +{ + genConsumeOperands(lea); + emitter* emit = GetEmitter(); + emitAttr size = emitTypeSize(lea); + int offset = lea->Offset(); + + // So for the case of a LEA node of the form [Base + Index*Scale + Offset] we will generate: + // tmpReg = indexReg << scale; + // destReg = baseReg + tmpReg; + // destReg = destReg + offset; + // + // TODO-LOONGARCH64-CQ: The purpose of the GT_LEA node is to directly reflect a single target architecture + // addressing mode instruction. Currently we're 'cheating' by producing one or more + // instructions to generate the addressing mode so we need to modify lowering to + // produce LEAs that are a 1:1 relationship to the LOONGARCH64 architecture. + if (lea->Base() && lea->Index()) + { + GenTree* memBase = lea->Base(); + GenTree* index = lea->Index(); + + DWORD scale; + + assert(isPow2(lea->gtScale)); + BitScanForward(&scale, lea->gtScale); + assert(scale <= 4); + + if (offset == 0) + { + // Then compute target reg from [base + index*scale] + genScaledAdd(size, lea->GetRegNum(), memBase->GetRegNum(), index->GetRegNum(), scale); + } + else + { + // When generating fully interruptible code we have to use the "large offset" sequence + // when calculating a EA_BYREF as we can't report a byref that points outside of the object + bool useLargeOffsetSeq = compiler->GetInterruptible() && (size == EA_BYREF); + + if (!useLargeOffsetSeq && emitter::isValidSimm12(offset)) + { + genScaledAdd(size, lea->GetRegNum(), memBase->GetRegNum(), index->GetRegNum(), scale); + instruction ins = size == EA_4BYTE ? INS_addiw : INS_addi; + emit->emitIns_R_R_I(ins, size, lea->GetRegNum(), lea->GetRegNum(), offset); + } + else + { + regNumber tmpReg = lea->GetSingleTempReg(); + + noway_assert(tmpReg != index->GetRegNum()); + noway_assert(tmpReg != memBase->GetRegNum()); + + // compute the large offset. + instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); + + genScaledAdd(EA_PTRSIZE, tmpReg, tmpReg, index->GetRegNum(), scale); + + instruction ins = size == EA_4BYTE ? INS_addw : INS_add; + emit->emitIns_R_R_R(ins, size, lea->GetRegNum(), tmpReg, memBase->GetRegNum()); + } + } + } + else if (lea->Base()) + { + GenTree* memBase = lea->Base(); + + if (emitter::isValidSimm12(offset)) + { + if (offset != 0) + { + // Then compute target reg from [memBase + offset] + emit->emitIns_R_R_I(INS_addi, size, lea->GetRegNum(), memBase->GetRegNum(), offset); + } + else // offset is zero + { + if (lea->GetRegNum() != memBase->GetRegNum()) + { + emit->emitIns_R_R_I(INS_ori, size, lea->GetRegNum(), memBase->GetRegNum(), 0); + } + } + } + else + { + // We require a tmpReg to hold the offset + regNumber tmpReg = lea->GetSingleTempReg(); + + // First load tmpReg with the large offset constant + emit->emitIns_I_la(EA_PTRSIZE, tmpReg, offset); + + // Then compute target reg from [memBase + tmpReg] + emit->emitIns_R_R_R(INS_add, size, lea->GetRegNum(), memBase->GetRegNum(), tmpReg); + } + } + else if (lea->Index()) + { + // If we encounter a GT_LEA node without a base it means it came out + // when attempting to optimize an arbitrary arithmetic expression during lower. + // This is currently disabled in LOONGARCH64 since we need to adjust lower to account + // for the simpler instructions LOONGARCH64 supports. + // TODO-LOONGARCH64-CQ: Fix this and let LEA optimize arithmetic trees too. + assert(!"We shouldn't see a baseless address computation during CodeGen for LOONGARCH64"); + } + + genProduceReg(lea); +} + +//------------------------------------------------------------------------ +// genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer. +// +// Arguments: +// delta - the offset to add to the current stack pointer to establish the frame pointer +// reportUnwindData - true if establishing the frame pointer should be reported in the OS unwind data. + +void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData) +{ + assert(compiler->compGeneratingProlog); + + assert(emitter::isValidSimm12(delta)); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta); + + if (reportUnwindData) + { + compiler->unwindSetFrameReg(REG_FPBASE, delta); + } +; +} + +//------------------------------------------------------------------------ +// genAllocLclFrame: Probe the stack and allocate the local stack frame: subtract from SP. +// +// Notes: +// On LOONGARCH64, this only does the probing; allocating the frame is done when callee-saved registers are saved. +// This is done before anything has been pushed. The previous frame might have a large outgoing argument +// space that has been allocated, but the lowest addresses have not been touched. Our frame setup might +// not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however, +// there are always three guard pages, so we will not miss them all. On Linux, there is only one guard +// page by default, so we need to be more careful. We do an extra probe if we might not have probed +// recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this +// on Windows as well just to be consistent, even though it should not be necessary. +// +void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) +{ + NYI("unimplemented on RISCV64 yet"); +} + +inline void CodeGen::genJumpToThrowHlpBlk_la( + SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk, regNumber reg2) +{ + assert(INS_beq <= ins && ins <= INS_bgeu); + + bool useThrowHlpBlk = compiler->fgUseThrowHelperBlocks(); + + emitter* emit = GetEmitter(); + if (useThrowHlpBlk) + { + // For code with throw helper blocks, find and use the helper block for + // raising the exception. The block may be shared by other trees too. + + BasicBlock* excpRaisingBlock; + + if (failBlk != nullptr) + { + // We already know which block to jump to. Use that. + excpRaisingBlock = failBlk; + +#ifdef DEBUG + Compiler::AddCodeDsc* add = + compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB)); + assert(excpRaisingBlock == add->acdDstBlk); +#if !FEATURE_FIXED_OUT_ARGS + assert(add->acdStkLvlInit || isFramePointerUsed()); +#endif // !FEATURE_FIXED_OUT_ARGS +#endif // DEBUG + } + else + { + // Find the helper-block which raises the exception. + Compiler::AddCodeDsc* add = + compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB)); + PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block")); + excpRaisingBlock = add->acdDstBlk; +#if !FEATURE_FIXED_OUT_ARGS + assert(add->acdStkLvlInit || isFramePointerUsed()); +#endif // !FEATURE_FIXED_OUT_ARGS + } + + noway_assert(excpRaisingBlock != nullptr); + + // Jump to the exception-throwing block on error. + emit->emitIns_J(ins, excpRaisingBlock, (int)reg1 | ((int)reg2 << 5)); // 5-bits; + } + else + { + // The code to throw the exception will be generated inline, and + // we will jump around it in the normal non-exception case. + + void* pAddr = nullptr; + void* addr = compiler->compGetHelperFtn((CorInfoHelpFunc)(compiler->acdHelper(codeKind)), &pAddr); + emitter::EmitCallType callType; + regNumber callTarget; + + // maybe optimize + // ins = (instruction)(ins^((ins != INS_beq)+(ins != INS_bne))); + if (ins == INS_blt) + { + ins = INS_bge; + } + else if (ins == INS_bltu) + { + ins = INS_bgeu; + } + else if (ins == INS_bge) + { + ins = INS_blt; + } + else if (ins == INS_bgeu) + { + ins = INS_bltu; + } + else + { + ins = ins == INS_beq ? INS_bne : INS_beq; + } + + if (addr == nullptr) + { + _ASSERTE(!"TODO RISCV64 NYI"); + callType = emitter::EC_INDIR_R; + callTarget = REG_DEFAULT_HELPER_CALL_TARGET; + + /* + // ssize_t imm = (4 + 1 + 1) << 2;// 4=li, 1=ld, 1=jirl. + + if (compiler->opts.compReloc) + { + ssize_t imm = (2 + 1) << 2; // , 1=jalr. + emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm); + GetEmitter()->emitIns_R_AI(INS_jalr, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); // TODO NEED TO CHECK bl => jalr + } + else + { + ssize_t imm = (3 + 1) << 2; // , 1=jalr. + emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm); + + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, + ((ssize_t)pAddr & 0xfff) >> 2); + } + */ + } + else + { // INS_OPTS_C + callType = emitter::EC_FUNC_TOKEN; + callTarget = REG_NA; + + ssize_t imm = 9 << 2; + if (compiler->opts.compReloc) + { + imm = 3 << 2; + } + + emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm); + } + + BasicBlock* skipLabel = genCreateTempLabel(); + + emit->emitIns_Call(callType, compiler->eeFindHelper(compiler->acdHelper(codeKind)), + INDEBUG_LDISASM_COMMA(nullptr) addr, 0, EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ + callTarget, /* ireg */ + REG_NA, 0, 0, /* xreg, xmul, disp */ + false /* isJump */ + ); + + regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)(compiler->acdHelper(codeKind))); + regSet.verifyRegistersUsed(killMask); + + // NOTE: here is just defining an `empty` label which will create a new IGroup for updating the gcInfo. + genDefineTempLabel(skipLabel); + } +} +//----------------------------------------------------------------------------------- +// instGen_MemoryBarrier: Emit a MemoryBarrier instruction +// +// Arguments: +// barrierKind - kind of barrier to emit (Only supports the Full now!! This depends on the CPU). +// +// Notes: +// All MemoryBarriers instructions can be removed by DOTNET_JitNoMemoryBarriers=1 +// +void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) +{ +#ifdef DEBUG + if (JitConfig.JitNoMemoryBarriers() == 1) + { + return; + } +#endif // DEBUG + + // TODO-RISCV64: Use the exact barrier type depending on the CPU. + GetEmitter()->emitIns_I(INS_fence, EA_4BYTE, INS_BARRIER_FULL); +} + +//----------------------------------------------------------------------------------- +// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback. +// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. +// +// Arguments: +// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL +// +// Return Value: +// None +// +void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/) +{ + NYI("unimplemented on RISCV64 yet"); +} + +/*----------------------------------------------------------------------------- + * + * Push/Pop any callee-saved registers we have used + */ +void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED; + +#if ETW_EBP_FRAMED + if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE)) + { + noway_assert(!"Used register RBM_FPBASE as a scratch register!"); + } +#endif + + // On LA we push the FP (frame-pointer) here along with all other callee saved registers + if (isFramePointerUsed()) + { + rsPushRegs |= RBM_FPBASE; + } + + // + // It may be possible to skip pushing/popping ra for leaf methods. However, such optimization would require + // changes in GC suspension architecture. + // + // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we + // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf + // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends + // on the return address to be saved on the stack. If we skipped pushing/popping ra, the return address would never + // be saved on the stack and the GC suspension would time out. + // + // So if we wanted to skip pushing/popping ra for leaf frames, we would also need to do one of + // the following to make GC suspension work in the above scenario: + // - Make return address hijacking work even when ra is not saved on the stack. + // - Generate fully interruptible code for loops that contains calls + // - Generate fully interruptible code for leaf methods + // + // Given the limited benefit from this optimization (<10k for SPCL NGen image), the extra complexity + // is not worth it. + // + + rsPushRegs |= RBM_RA; // We must save the return address (in the RA register). + regSet.rsMaskCalleeSaved = rsPushRegs; + regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT; + regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat; + +#ifdef DEBUG + if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs)) + { + printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ", + compiler->compCalleeRegsPushed, genCountBits(rsPushRegs)); + dspRegMask(rsPushRegs); + printf("\n"); + assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs)); + } +#endif // DEBUG + + int totalFrameSize = genTotalFrameSize(); + + int offset; // This will be the starting place for saving the callee-saved registers, in increasing order. + +#ifdef DEBUG + if (verbose) + { + printf("Save float regs: "); + dspRegMask(maskSaveRegsFloat); + printf("\n"); + printf("Save int regs: "); + dspRegMask(maskSaveRegsInt); + printf("\n"); + } +#endif // DEBUG + + // The frameType number is arbitrary, is defined below, and corresponds to one of the frame styles we + // generate based on various sizes. + int frameType = 0; + + // The amount to subtract from SP before starting to store the callee-saved registers. It might be folded into the + // first save instruction as a "predecrement" amount, if possible. + int calleeSaveSPDelta = 0; + + // By default, we'll establish the frame pointer chain. (Note that currently frames without FP are NYI.) + bool establishFramePointer = true; + + // If we do establish the frame pointer, what is the amount we add to SP to do so? + unsigned offsetSpToSavedFp = 0; + + if (isFramePointerUsed()) + { + // We need to save both FP and RA. + + assert((maskSaveRegsInt & RBM_FP) != 0); + assert((maskSaveRegsInt & RBM_RA) != 0); + + // If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address + // (FP and RA) are protected from buffer overrun by the GS cookie. If FP/RA are at the lowest addresses, + // then they are safe, since they are lower than any unsafe buffers. And the GS cookie we add will + // protect our caller's frame. If we have a localloc, however, that is dynamically placed lower than our + // saved FP/RA. In that case, we save FP/RA along with the rest of the callee-saved registers, above + // the GS cookie. + // + // After the frame is allocated, the frame pointer is established, pointing at the saved frame pointer to + // create a frame pointer chain. + // + + if (totalFrameSize < 2048) + { + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -totalFrameSize); + compiler->unwindAllocStack(totalFrameSize); + + // Case #1. + // + // Generate: + // addi sp, sp, -framesz + // sd fp, outsz(sp) + // sd ra, outsz+8(sp) + // + // The (totalFrameSize <= 2047) condition ensures the offsets of sd/ld. + // + // After saving callee-saved registers, we establish the frame pointer with: + // daddiu fp, sp, offset-fp + // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match. + + JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize); + + frameType = 1; + + offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize; + + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, offsetSpToSavedFp); + compiler->unwindSaveReg(REG_FP, offsetSpToSavedFp); + + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, offsetSpToSavedFp + 8); + compiler->unwindSaveReg(REG_RA, offsetSpToSavedFp + 8); + + maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA + + offset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // FP/RA + } + else + { + JITDUMP("Frame type 2. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize); + + frameType = 2; + + maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA + + offset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); + offset = calleeSaveSPDelta - offset; + } + } + else + { + // No frame pointer (no chaining). + assert((maskSaveRegsInt & RBM_FP) == 0); + assert((maskSaveRegsInt & RBM_RA) != 0); + + // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using + // 'sd' if we only have one callee-saved register plus RA to save. + + NYI("Frame without frame pointer"); + offset = 0; + } + + assert(frameType != 0); + + JITDUMP(" offset=%d, calleeSaveSPDelta=%d\n", offset, calleeSaveSPDelta); + genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta); + + // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here, + // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't + // need to add codes at all. + if (compiler->info.compIsVarArgs) + { + JITDUMP(" compIsVarArgs=true\n"); + NYI_RISCV64("genPushCalleeSavedRegisters unsupports compIsVarArgs"); + } + +#ifdef DEBUG + if (compiler->opts.disAsm) + { + printf("DEBUG: RISCV64, frameType:%d\n\n", frameType); + } +#endif + if (frameType == 1) + { + // offsetSpToSavedFp = genSPtoFPdelta(); + } + else if (frameType == 2) + { + if (compiler->lvaOutgoingArgSpaceSize >= 2040) + { + offset = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize; + calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); + offset = calleeSaveSPDelta - offset; + + genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true); + + offsetSpToSavedFp = offset; + + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, offset); + compiler->unwindSaveReg(REG_FP, offset); + + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8); + compiler->unwindSaveReg(REG_RA, offset + 8); + + genEstablishFramePointer(offset, /* reportUnwindData */ true); + + calleeSaveSPDelta = compiler->lvaOutgoingArgSpaceSize & ~0xf; + genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true); + } + else + { + calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta; + genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true); + + offset = compiler->lvaOutgoingArgSpaceSize; + + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, offset); + compiler->unwindSaveReg(REG_FP, offset); + + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8); + compiler->unwindSaveReg(REG_RA, offset + 8); + + genEstablishFramePointer(offset, /* reportUnwindData */ true); + } + + establishFramePointer = false; + } + else + { + unreached(); + } + + if (establishFramePointer) + { + JITDUMP(" offsetSpToSavedFp=%d\n", offsetSpToSavedFp); + genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true); + } +} + +void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) +{ + assert(compiler->compGeneratingEpilog); + + regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED; + + if (isFramePointerUsed()) + { + rsRestoreRegs |= RBM_FPBASE; + } + + rsRestoreRegs |= RBM_RA; // We must save/restore the return address. + + regMaskTP regsToRestoreMask = rsRestoreRegs; + + int totalFrameSize = genTotalFrameSize(); + + int calleeSaveSPOffset = 0; // This will be the starting place for restoring + // the callee-saved registers, in decreasing order. + int frameType = 0; // An indicator of what type of frame we are popping. + int calleeSaveSPDelta = 0; // Amount to add to SP after callee-saved registers have been restored. + + if (isFramePointerUsed()) + { + if (totalFrameSize <= 2047) + { + if (compiler->compLocallocUsed) + { + int SPtoFPdelta = genSPtoFPdelta(); + // Restore sp from fp + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta); + compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta); + } + + JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, dspBool(compiler->compLocallocUsed)); + + frameType = 1; + + regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. + + calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; + } + else + { + JITDUMP("Frame type 2(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; " + "localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, + dspBool(compiler->compLocallocUsed)); + + frameType = 2; + + int outSzAligned; + if (compiler->lvaOutgoingArgSpaceSize >= 2040) + { + int offset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); + calleeSaveSPOffset = calleeSaveSPDelta - offset; + + int offset2 = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize; + calleeSaveSPDelta = AlignUp((UINT)offset2, STACK_ALIGN); + offset2 = calleeSaveSPDelta - offset2; + + if (compiler->compLocallocUsed) + { + // Restore sp from fp + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2); + compiler->unwindSetFrameReg(REG_FPBASE, offset2); + } + else + { + outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf; + genStackPointerAdjustment(outSzAligned, REG_T6, nullptr, /* reportUnwindData */ true); // TODO CHECK R21 => T6 + } + + regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. + + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8); + compiler->unwindSaveReg(REG_RA, offset2 + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2); + compiler->unwindSaveReg(REG_FP, offset2); + + genStackPointerAdjustment(calleeSaveSPDelta, REG_T6, nullptr, /* reportUnwindData */ true); // TODO CHECK R21 =>T6 + + calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN); + } + else + { + int offset2 = compiler->lvaOutgoingArgSpaceSize; + if (compiler->compLocallocUsed) + { + // Restore sp from fp + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2); + compiler->unwindSetFrameReg(REG_FPBASE, offset2); + } + + regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. + + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8); + compiler->unwindSaveReg(REG_RA, offset2 + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2); + compiler->unwindSaveReg(REG_FP, offset2); + + calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN); + calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset; + + genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_T6, nullptr, + /* reportUnwindData */ true); // TODO CHECK R21 => T6 + } + } + } + else + { + // No frame pointer (no chaining). + NYI("Frame without frame pointer"); + calleeSaveSPOffset = 0; + } + + JITDUMP(" calleeSaveSPOffset=%d, calleeSaveSPDelta=%d\n", calleeSaveSPOffset, calleeSaveSPDelta); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta); + + if (frameType == 1) + { + calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize; + + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSaveSPOffset + 8); + compiler->unwindSaveReg(REG_RA, calleeSaveSPOffset + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSaveSPOffset); + compiler->unwindSaveReg(REG_FP, calleeSaveSPOffset); + + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); + compiler->unwindAllocStack(totalFrameSize); + } + else if (frameType == 2) + { + // had done. + } + else + { + unreached(); + } +} + +void CodeGen::genFnPrologCalleeRegArgs() +{ + assert(!(intRegState.rsCalleeRegArgMaskLiveIn & floatRegState.rsCalleeRegArgMaskLiveIn)); + + regMaskTP regArgMaskLive = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn; + +#ifdef DEBUG + if (verbose) + { + printf("*************** In genFnPrologCalleeRegArgs() RISCV64:0x%llx.\n", regArgMaskLive); + } +#endif + + // We should be generating the prolog block when we are called + assert(compiler->compGeneratingProlog); + + // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called. + noway_assert(regArgMaskLive != 0); + + unsigned varNum; + unsigned regArgMaskIsInt = 0; + unsigned regArgNum = 0; + // Process any circular dependencies + unsigned regArg[MAX_REG_ARG * 2] = {0}; + unsigned regArgInit[MAX_REG_ARG * 2] = {0}; + for (varNum = 0; varNum < compiler->lvaCount; ++varNum) + { + LclVarDsc* varDsc = compiler->lvaTable + varNum; + + // Is this variable a register arg? + if (!varDsc->lvIsParam) + { + continue; + } + + if (!varDsc->lvIsRegArg) + { + continue; + } + + if (varDsc->lvIsInReg()) + { + assert(genIsValidIntReg(varDsc->GetArgReg()) || genIsValidFloatReg(varDsc->GetArgReg())); + assert(!(genIsValidIntReg(varDsc->GetOtherArgReg()) || genIsValidFloatReg(varDsc->GetOtherArgReg()))); + if (varDsc->GetArgInitReg() != varDsc->GetArgReg()) + { + if (genIsValidIntReg(varDsc->GetArgInitReg())) + { + if (varDsc->GetArgInitReg() > REG_ARG_LAST) + { + bool isSkip; + instruction ins; + emitAttr size; + if (genIsValidIntReg(varDsc->GetArgReg())) + { + ins = INS_mov; + if (varDsc->TypeGet() == TYP_INT) + { + size = EA_4BYTE; + isSkip = false; + } + else + { + size = EA_PTRSIZE; + isSkip = true; + } + } + else + { + ins = INS_fmv_x_d; + size = EA_PTRSIZE; + isSkip = true; + } + GetEmitter()->emitIns_Mov(ins, size, varDsc->GetArgInitReg(), varDsc->GetArgReg(), isSkip); + regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + } + else + { + if (genIsValidIntReg(varDsc->GetArgReg())) + { + assert(varDsc->GetArgReg() >= REG_ARG_FIRST && varDsc->GetArgReg() <= REG_ARG_LAST); + regArg[varDsc->GetArgReg() - REG_ARG_FIRST] = varDsc->GetArgReg(); + regArgInit[varDsc->GetArgReg() - REG_ARG_FIRST] = varDsc->GetArgInitReg(); + if (varDsc->TypeGet() == TYP_INT) + { + regArgMaskIsInt = 1 << (unsigned)varDsc->GetArgReg(); + } + } + else + { + assert(genIsValidFloatReg(varDsc->GetArgReg())); + assert(varDsc->GetArgReg() >= REG_ARG_FP_FIRST && varDsc->GetArgReg() <= REG_ARG_FP_LAST); + regArg[(varDsc->GetArgReg() - REG_ARG_FP_FIRST) | 0x8] = varDsc->GetArgReg(); + regArgInit[(varDsc->GetArgReg() - REG_ARG_FP_FIRST) | 0x8] = varDsc->GetArgInitReg(); + } + regArgNum++; + } + } + else + { + assert(genIsValidFloatReg(varDsc->GetArgInitReg())); + if (genIsValidIntReg(varDsc->GetArgReg())) + { + GetEmitter()->emitIns_Mov(INS_fmv_d_x, EA_PTRSIZE, varDsc->GetArgInitReg(), + varDsc->GetArgReg(), false); + regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + } + else if (varDsc->GetArgInitReg() > REG_ARG_FP_LAST) + { + GetEmitter()->emitIns_Mov(INS_fsgnj_d, EA_PTRSIZE, varDsc->GetArgInitReg(), varDsc->GetArgReg(), + true); + regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + } + else + { + assert(genIsValidFloatReg(varDsc->GetArgReg())); + regArg[(varDsc->GetArgReg() & 7) | 0x8] = varDsc->GetArgReg(); + regArgInit[(varDsc->GetArgReg() & 7) | 0x8] = varDsc->GetArgInitReg(); + regArgNum++; + } + } + } + else + { + // TODO for RISCV64: should delete this by optimization "struct {long a; int32_t b;};" + // liking AMD64_ABI within morph. + if (genIsValidIntReg(varDsc->GetArgReg()) && (varDsc->TypeGet() == TYP_INT)) + { + GetEmitter()->emitIns_Mov(INS_mov, EA_4BYTE, varDsc->GetArgInitReg(), varDsc->GetArgReg(), false); + } + regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + } +#ifdef USING_SCOPE_INFO + psiMoveToReg(varNum); +#endif // USING_SCOPE_INFO + if (!varDsc->lvLiveInOutOfHndlr) + { + continue; + } + } + + // When we have a promoted struct we have two possible LclVars that can represent the incoming argument + // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField. + // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise + // use the original TYP_STRUCT argument. + // + if (varDsc->lvPromoted || varDsc->lvIsStructField) + { + LclVarDsc* parentVarDsc = varDsc; + if (varDsc->lvIsStructField) + { + assert(!varDsc->lvPromoted); + parentVarDsc = &compiler->lvaTable[varDsc->lvParentLcl]; + } + + Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc); + + if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) + { + // For register arguments that are independent promoted structs we put the promoted field varNum + // in the regArgTab[] + if (varDsc->lvPromoted) + { + continue; + } + } + else + { + // For register arguments that are not independent promoted structs we put the parent struct varNum + // in the regArgTab[] + if (varDsc->lvIsStructField) + { + continue; + } + } + } + + var_types storeType = TYP_UNDEF; + int slotSize = TARGET_POINTER_SIZE; + + if (varTypeIsStruct(varDsc)) + { + if (emitter::isFloatReg(varDsc->GetArgReg())) + { + storeType = varDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE; + } + else + { + assert(emitter::isGeneralRegister(varDsc->GetArgReg())); + if (varDsc->lvIs4Field1) + { + storeType = TYP_INT; + } + else + { + storeType = varDsc->GetLayout()->GetGCPtrType(0); + } + } + slotSize = (int)EA_SIZE(emitActualTypeSize(storeType)); + +#if FEATURE_MULTIREG_ARGS + // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers + noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES); +#endif + } + else // Not a struct type + { + storeType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet())); + if (emitter::isFloatReg(varDsc->GetArgReg()) != varTypeIsFloating(storeType)) + { + assert(varTypeIsFloating(storeType)); + storeType = storeType == TYP_DOUBLE ? TYP_I_IMPL : TYP_INT; + } + } + emitAttr size = emitActualTypeSize(storeType); + + regNumber srcRegNum = varDsc->GetArgReg(); + + // Stack argument - if the ref count is 0 don't care about it + if (!varDsc->lvOnFrame) + { + noway_assert(varDsc->lvRefCnt() == 0); + regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + if (varDsc->GetOtherArgReg() < REG_STK) + { + regArgMaskLive &= ~genRegMask(varDsc->GetOtherArgReg()); + } + } + else + { + assert(srcRegNum != varDsc->GetOtherArgReg()); + + regNumber tmp_reg = REG_NA; + + bool FPbased; + int baseOffset = compiler->lvaFrameAddress(varNum, &FPbased); + + // First store the `varDsc->GetArgReg()` on stack. + if (emitter::isValidSimm12(baseOffset)) + { + GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, REG_NA, varNum, 0); + } + else + { + assert(tmp_reg == REG_NA); + + tmp_reg = REG_T6; // TODO CHECK LATER + GetEmitter()->emitIns_I_la(EA_PTRSIZE, tmp_reg, baseOffset); + // The last parameter `int offs` of the `emitIns_S_R` is negtive, + // it means the offset imm had been stored within the `REG_T6`. + GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, tmp_reg, varNum, -8); + } + + regArgMaskLive &= ~genRegMask(srcRegNum); + + // Then check if varDsc is a struct arg + if (varTypeIsStruct(varDsc)) + { + if (emitter::isFloatReg(varDsc->GetOtherArgReg())) + { + srcRegNum = varDsc->GetOtherArgReg(); + storeType = varDsc->lvIs4Field2 ? TYP_FLOAT : TYP_DOUBLE; + size = EA_SIZE(emitActualTypeSize(storeType)); + + slotSize = slotSize < (int)size ? (int)size : slotSize; + } + else if (emitter::isGeneralRegister(varDsc->GetOtherArgReg())) + { + if (varDsc->lvIs4Field2) + { + storeType = TYP_INT; + } + else + { + storeType = varDsc->GetLayout()->GetGCPtrType(1); + } + + srcRegNum = varDsc->GetOtherArgReg(); + size = emitActualTypeSize(storeType); + + slotSize = slotSize < (int)EA_SIZE(size) ? (int)EA_SIZE(size) : slotSize; + } + baseOffset += slotSize; + + // if the struct passed by two register, then store the second register `varDsc->GetOtherArgReg()`. + if (srcRegNum == varDsc->GetOtherArgReg()) + { + if (emitter::isValidSimm12(baseOffset)) + { + GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, REG_NA, varNum, slotSize); + } + else + { + if (tmp_reg == REG_NA) + { + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_T6, baseOffset); // TODO REG21 => REGT6 + // The last parameter `int offs` of the `emitIns_S_R` is negtive, + // it means the offset imm had been stored within the `REG_T6`. + GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, REG_T6, varNum, + -slotSize - 8); + } + else + { + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_T6, REG_T6, slotSize); // TODO REG21 => T6 + GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, REG_T6, varNum, + -slotSize - 8); // TODO REG21 => T6 + } + } + regArgMaskLive &= ~genRegMask(srcRegNum); // maybe do this later is better! + } + else if (varDsc->lvIsSplit) + { + // the struct is a split struct. + assert(varDsc->GetArgReg() == REG_ARG_LAST && varDsc->GetOtherArgReg() == REG_STK); + + // For the LA's ABI, the split struct arg will be passed via `A7` and a stack slot on caller. + // But if the `A7` is stored on stack on the callee side, the whole split struct should be + // stored continuous on the stack on the callee side. + // So, after we save `A7` on the stack in prolog, it has to copy the stack slot of the split struct + // which was passed by the caller. Here we load the stack slot to `REG_SCRATCH`, and save it + // on the stack following the `A7` in prolog. + if (emitter::isValidSimm12(genTotalFrameSize())) + { + GetEmitter()->emitIns_R_R_I(INS_ld, size, REG_SCRATCH, REG_SPBASE, genTotalFrameSize()); + } + else + { + assert(!EA_IS_RELOC(size)); + GetEmitter()->emitIns_I_la(size, REG_SCRATCH, genTotalFrameSize()); + GetEmitter()->emitIns_R_R_R(INS_add, size, REG_SCRATCH, REG_SCRATCH, REG_SPBASE); + GetEmitter()->emitIns_R_R_I(INS_ld, size, REG_SCRATCH, REG_SCRATCH, 0); + } + + if (emitter::isValidSimm12(baseOffset)) + { + GetEmitter()->emitIns_S_R(INS_sd, size, REG_SCRATCH, REG_NA, varNum, TARGET_POINTER_SIZE); + } + else + { + if (tmp_reg == REG_NA) + { + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_T6, baseOffset); // TODO REG21 => T6 + // The last parameter `int offs` of the `emitIns_S_R` is negtive, + // it means the offset imm had been stored within the `REG_T6`. + GetEmitter()->emitIns_S_R(INS_sd, size, REG_SCRATCH, REG_T6, varNum, -8); + } + else + { + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_T6, REG_T6, TARGET_POINTER_SIZE); + GetEmitter()->emitIns_S_R(INS_sd, size, REG_SCRATCH, REG_T6, varNum, -slotSize - 8); + } + } + } + } + +#ifdef USING_SCOPE_INFO + { + psiMoveToStack(varNum); + } +#endif // USING_SCOPE_INFO + } + } + + if (regArgNum > 0) + { + instruction ins; + for (int i = MAX_REG_ARG - 1; i >= 0; i--) + { + if (regArg[i] > 0) + { + assert(genIsValidIntReg((regNumber)regArg[i])); + assert(genIsValidIntReg((regNumber)regArgInit[i])); + + regArgNum--; + regArgMaskLive &= ~genRegMask((regNumber)regArg[i]); + if ((regArgMaskIsInt & (1 << regArg[i])) != 0) + { + ins = INS_slliw; + } + else + { + ins = INS_ori; + } + + if (regArgNum == 0) + { + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, (regNumber)regArgInit[i], (regNumber)regArg[i], 0); + break; + } + else if (regArgInit[i] > regArg[i] || + (regArgInit[i] >= REG_T0 && regArgInit[i] <= REG_S1)) // TODO NEED TO VERIFY + { + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, (regNumber)regArgInit[i], (regNumber)regArg[i], 0); + } + else + { + assert(i > 0); + assert(regArgNum > 0); + + int j = regArgInit[i] - REG_ARG_FIRST; + assert((j >= 0) && (j < MAX_REG_ARG)); + if (regArg[j] == 0) + { + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, (regNumber)regArgInit[i], (regNumber)regArg[i], 0); + } + else + { + int k = regArgInit[j] - REG_ARG_FIRST; + // assert((k >= 0) && (k < MAX_REG_ARG)); + instruction ins2 = (regArgMaskIsInt & (1 << regArg[j])) != 0 ? INS_slliw : INS_ori; + if ((regArg[k] == 0) || (k > i) || k < 0) + { + GetEmitter()->emitIns_R_R_I(ins2, EA_PTRSIZE, (regNumber)regArgInit[j], + (regNumber)regArg[j], 0); + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, (regNumber)regArgInit[i], (regNumber)regArg[i], + 0); + regArgNum--; + regArgMaskLive &= ~genRegMask((regNumber)regArg[j]); + if (regArgNum == 0) + { + break; + } + } + else if (k == i) + { + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, REG_T6, (regNumber)regArg[i], 0); // TODO REG21 => T6 + GetEmitter()->emitIns_R_R_I(ins2, EA_PTRSIZE, (regNumber)regArgInit[j], + (regNumber)regArg[j], 0); + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, (regNumber)regArgInit[i], REG_T6, 0); // TODO REG21 => T6 + regArgNum--; + regArgMaskLive &= ~genRegMask((regNumber)regArg[j]); + regArg[j] = 0; + if (regArgNum == 0) + { + break; + } + } + else + { + NYI_RISCV64("-----------CodeGen::genFnPrologCalleeRegArgs() error!--"); + } + } + } + } + } + + if (regArgNum > 0) + { + for (int i = MAX_REG_ARG + MAX_FLOAT_REG_ARG - 1; i >= MAX_REG_ARG; i--) + { + if (regArg[i] > 0) + { + assert(genIsValidFloatReg((regNumber)regArg[i])); + + instruction ins = genIsValidIntReg((regNumber)regArgInit[i]) ? INS_fmv_x_d : INS_fsgnj_d; + + regArgNum--; + regArgMaskLive &= ~genRegMask((regNumber)regArg[i]); + if (regArgNum == 0) + { + GetEmitter()->emitIns_Mov(ins, EA_PTRSIZE, (regNumber)regArgInit[i], (regNumber)regArg[i], + true); + break; + } + else if (regArgInit[i] > regArg[i] || (regArgInit[i] <= REG_F9)) // TODO NEED TO VERIFY + { + GetEmitter()->emitIns_R_R_R(INS_fsgnj_d, EA_PTRSIZE, (regNumber)regArgInit[i], + (regNumber)regArg[i], (regNumber)regArg[i]); + } + else + { + assert(i > MAX_REG_ARG); + assert(regArgNum > 0); + + int j = genIsValidIntReg((regNumber)regArgInit[i]) ? (regArgInit[i] - REG_ARG_FIRST) + : ((((int)regArgInit[i]) - REG_ARG_FP_FIRST) + 0x8); + if (j < MAX_REG_ARG || regArg[j] == 0) + { + GetEmitter()->emitIns_Mov(ins, EA_PTRSIZE, (regNumber)regArgInit[i], (regNumber)regArg[i], + true); + } + else + { + // NOTE: Not support the int-register case. + assert(genIsValidFloatReg((regNumber)regArg[j])); + assert(genIsValidFloatReg((regNumber)regArgInit[j])); + + int k = (((int)regArgInit[j]) - REG_ARG_FP_FIRST) + 0x8; + if ((regArg[k] == 0) || (k > i) || (k < MAX_REG_ARG)) + { + GetEmitter()->emitIns_R_R_R(INS_fsgnj_d, EA_PTRSIZE, (regNumber)regArgInit[j], + (regNumber)regArg[j], (regNumber)regArg[j]); + GetEmitter()->emitIns_R_R_R(INS_fsgnj_d, EA_PTRSIZE, (regNumber)regArgInit[i], + (regNumber)regArg[i], (regNumber)regArg[i]); + regArgNum--; + regArgMaskLive &= ~genRegMask((regNumber)regArg[j]); + if (regArgNum == 0) + { + break; + } + } + else if (k == i) + { + GetEmitter()->emitIns_R_R_R(INS_fsgnj_d, EA_PTRSIZE, REG_SCRATCH_FLT, + (regNumber)regArg[i], (regNumber)regArg[i]); + GetEmitter()->emitIns_R_R_R(INS_fsgnj_d, EA_PTRSIZE, (regNumber)regArgInit[j], + (regNumber)regArg[j], (regNumber)regArg[j]); + GetEmitter()->emitIns_R_R_R(INS_fsgnj_d, EA_PTRSIZE, (regNumber)regArgInit[i], + REG_SCRATCH_FLT, REG_SCRATCH_FLT); + regArgNum--; + regArgMaskLive &= ~genRegMask((regNumber)regArg[j]); + regArg[j] = 0; + if (regArgNum == 0) + { + break; + } + } + else + { + NYI_RISCV64("-----------CodeGen::genFnPrologCalleeRegArgs() error!--"); + } + } + } + } + } + } + assert(regArgNum == 0); + } + + assert(!regArgMaskLive); +} + +//----------------------------------------------------------------------------------- +// genProfilingEnterCallback: Generate the profiling function enter callback. +// +// Arguments: +// initReg - register to use as scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is +// set to non-zero value after this call. +// +// Return Value: +// None +// +void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + // Give profiler a chance to back out of hooking this method + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + // TODO RISCV64 +} + +// return size +// alignmentWB is out param +unsigned CodeGenInterface::InferOpSizeAlign(GenTree* op, unsigned* alignmentWB) +{ + NYI("unimplemented on RISCV64 yet"); + return 0; +} + +// return size +// alignmentWB is out param +unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignmentWB) +{ + NYI("unimplemented on RISCV64 yet"); + return 0; +} + +#endif // TARGET_RISCV64 diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 2856cc56d6b1ef..90dea15d11cadc 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -535,12 +535,12 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS useType = TYP_SHORT; break; -#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) +#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) case 3: useType = TYP_INT; break; -#endif // !TARGET_XARCH || UNIX_AMD64_ABI || TARGET_LOONGARCH64 +#endif // !TARGET_XARCH || UNIX_AMD64_ABI || TARGET_LOONGARCH64 || TARGET_RISCV64 #ifdef TARGET_64BIT case 4: @@ -548,14 +548,14 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS useType = TYP_INT; break; -#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) +#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) case 5: case 6: case 7: useType = TYP_I_IMPL; break; -#endif // !TARGET_XARCH || UNIX_AMD64_ABI || TARGET_LOONGARCH64 +#endif // !TARGET_XARCH || UNIX_AMD64_ABI || TARGET_LOONGARCH64 || TARGET_RISCV64 #endif // TARGET_64BIT case TARGET_POINTER_SIZE: @@ -747,7 +747,7 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, useType = TYP_UNKNOWN; } -#elif defined(TARGET_X86) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_X86) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Otherwise we pass this struct by value on the stack // setup wbPassType and useType indicate that this is passed by value according to the X86/ARM32 ABI @@ -775,7 +775,7 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, howToPassStruct = SPK_ByValue; useType = TYP_STRUCT; -#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Otherwise we pass this struct by reference to a copy // setup wbPassType and useType indicate that this is passed using one register (by reference to a copy) @@ -900,7 +900,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, howToReturnStruct = SPK_ByReference; useType = TYP_UNKNOWN; } -#elif TARGET_LOONGARCH64 +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (structSize <= (TARGET_POINTER_SIZE * 2)) { uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(clsHnd); @@ -1058,7 +1058,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, howToReturnStruct = SPK_ByReference; useType = TYP_UNKNOWN; -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // On LOONGARCH64 struct that is 1-16 bytes is returned by value in one/two register(s) howToReturnStruct = SPK_ByValue; @@ -2242,7 +2242,9 @@ void Compiler::compSetProcessor() #elif defined(TARGET_LOONGARCH64) info.genCPU = CPU_LOONGARCH64; +#elif defined(TARGET_RISCV64) + info.genCPU = CPU_RISCV64; #endif // @@ -3190,10 +3192,10 @@ void Compiler::compInitOptions(JitFlags* jitFlags) } #endif // FEATURE_CFI_SUPPORT -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Hot/cold splitting is not being tested on LoongArch64. opts.compProcedureSplitting = false; -#endif // TARGET_LOONGARCH64 +#endif // TARGET_LOONGARCH64 || TARGET_RISCV64 #ifdef DEBUG opts.compProcedureSplittingEH = opts.compProcedureSplitting; @@ -3950,7 +3952,7 @@ void Compiler::compSetOptimizationLevel() fgCanRelocateEHRegions = true; } -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Function compRsvdRegCheck: // given a curState to use for calculating the total frame size // it will return true if the REG_OPT_RSVD should be reserved so @@ -3999,6 +4001,10 @@ bool Compiler::compRsvdRegCheck(FrameLayoutState curState) JITDUMP(" Returning true (LOONGARCH64)\n\n"); return true; // just always assume we'll need it, for now +#elif defined(TARGET_RISCV64) + JITDUMP(" Returning true (RISCV64)\n\n"); + return true; // just always assume we'll need it, for now + #else // TARGET_ARM // frame layout: @@ -4122,7 +4128,7 @@ bool Compiler::compRsvdRegCheck(FrameLayoutState curState) return false; #endif // TARGET_ARM } -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 //------------------------------------------------------------------------ // compGetTieringName: get a string describing tiered compilation settings diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index dd4b883679f8e7..5cbb7d034842a1 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -548,6 +548,13 @@ class LclVarDsc unsigned char lvIsSplit : 1; // Set if the argument is splited. #endif // defined(TARGET_LOONGARCH64) +#if defined(TARGET_RISCV64) + unsigned char lvIs4Field1 : 1; // Set if the 1st field is int or float within struct for RISCV64. + unsigned char lvIs4Field2 : 1; // Set if the 2nd field is int or float within struct for RISCV64. + unsigned char lvIsSplit : 1; // Set if the argument is splited. +#endif // defined(TARGET_RISCV64) + + unsigned char lvIsBoolean : 1; // set if variable is boolean unsigned char lvSingleDef : 1; // variable has a single def // before lvaMarkLocalVars: identifies ref type locals that can get type updates @@ -1786,7 +1793,7 @@ struct FuncInfoDsc emitLocation* coldStartLoc; // locations for the cold section, if there is one. emitLocation* coldEndLoc; -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) UnwindInfo uwi; // Unwind information for this function/funclet's hot section UnwindInfo* uwiCold; // Unwind information for this function/funclet's cold section @@ -1801,7 +1808,7 @@ struct FuncInfoDsc emitLocation* coldStartLoc; // locations for the cold section, if there is one. emitLocation* coldEndLoc; -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 #if defined(FEATURE_CFI_SUPPORT) jitstd::vector* cfiCodes; @@ -7839,6 +7846,9 @@ class Compiler #elif defined(TARGET_LOONGARCH64) reg = REG_T8; regMask = RBM_T8; +#elif defined(TARGET_RISCV64) + reg = REG_T5; + regMask = RBM_T5; #else #error Unsupported or unset target architecture #endif @@ -8196,6 +8206,16 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void unwindReturn(regNumber reg); #endif // defined(TARGET_LOONGARCH64) +#if defined(TARGET_RISCV64) + void unwindNop(); + void unwindPadding(); // Generate a sequence of unwind NOP codes representing instructions between the last + // instruction and the current location. + void unwindSaveReg(regNumber reg, int offset); + void unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset); + void unwindReturn(regNumber reg); +#endif // defined(TARGET_RISCV64) + + // // Private "helper" functions for the unwind implementation. // @@ -9881,6 +9901,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #define CPU_LOONGARCH64 0x0800 // The generic LOONGARCH64 CPU +#define CPU_RISCV64 0x1000 // The generic RISCV64 CPU + unsigned genCPU; // What CPU are we running on // Number of class profile probes in this method @@ -10358,7 +10380,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void compSetProcessor(); void compInitDebuggingInfo(); void compSetOptimizationLevel(); -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool compRsvdRegCheck(FrameLayoutState curState); #endif void compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFlags* compileFlags); @@ -11676,6 +11698,10 @@ const instruction INS_ABS = INS_fabs_d; // NOTE: default is double. const instruction INS_SQRT = INS_fsqrt_d; // NOTE: default is double. #endif // TARGET_LOONGARCH64 +#ifdef TARGET_RISCV64 +const instruction INS_BREAKPOINT = INS_ebreak; +#endif // TARGET_RISCV64 + /*****************************************************************************/ extern const BYTE genTypeSizes[]; diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 94dcf70963a413..d51574ce086a1f 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -606,7 +606,7 @@ inline bool isRegParamType(var_types type) #endif // !TARGET_X86 } -#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) /*****************************************************************************/ // Returns true if 'type' is a struct that can be enregistered for call args // or can be returned by value in multiple registers. @@ -664,7 +664,7 @@ inline bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types typ return result; } -#endif // TARGET_AMD64 || TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_AMD64 || TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 /*****************************************************************************/ @@ -3046,6 +3046,8 @@ inline unsigned genMapFloatRegNumToRegArgNum(regNumber regNum) return regNum - REG_F0; #elif defined(TARGET_LOONGARCH64) return regNum - REG_F0; +#elif defined(TARGET_RISCV64) + return regNum - REG_FLTARG_0; #elif defined(TARGET_ARM64) return regNum - REG_V0; #elif defined(UNIX_AMD64_ABI) diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp index c9528a9585ebd5..574f32d4bff9b0 100644 --- a/src/coreclr/jit/ee_il_dll.cpp +++ b/src/coreclr/jit/ee_il_dll.cpp @@ -446,7 +446,12 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* } // otherwise will we pass this struct by value in multiple registers #elif !defined(TARGET_ARM) - NYI("unknown target"); + // Any structs that are larger than MAX_PASS_MULTIREG_BYTES are always passed by reference + if (structSize > MAX_PASS_MULTIREG_BYTES) + { + // This struct is passed by reference using a single 'slot' + return TARGET_POINTER_SIZE; + } #endif // defined(TARGET_XXX) #endif // FEATURE_MULTIREG_ARGS diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 1749c6cc20afc0..1e1d3221723807 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -1204,7 +1204,7 @@ void emitter::emitBegFN(bool hasFramePtr emitFirstColdIG = nullptr; emitTotalCodeSize = 0; -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) emitCounts_INS_OPTS_J = 0; #endif @@ -1398,6 +1398,12 @@ void emitter::dispIns(instrDesc* id) // For LoongArch64 using the emitDisInsName(). NYI_LOONGARCH64("Not used on LOONGARCH64."); } +#elif defined(TARGET_RISCV64) +void emitter::dispIns(instrDesc* id) +{ + // For LoongArch64 using the emitDisInsName(). + NYI_RISCV64("Not used on RISCV64."); +} #else void emitter::dispIns(instrDesc* id) { @@ -2552,7 +2558,7 @@ void emitter::emitSetFrameRangeGCRs(int offsLo, int offsHi) #ifdef TARGET_AMD64 // doesn't have to be all negative on amd printf("-%04X ... %04X\n", -offsLo, offsHi); -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (offsHi < 0) printf("-%04X ... -%04X\n", -offsLo, -offsHi); else @@ -2887,7 +2893,7 @@ const char* emitter::emitLabelString(insGroup* ig) return retbuf; } -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Does the argument location point to an IG at the end of a function or funclet? // We can ignore the codePos part of the location, since it doesn't affect the @@ -3250,7 +3256,9 @@ void emitter::emitGenerateUnwindNop(instrDesc* id, void* context) comp->unwindNop(id->idCodeSize()); #elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) comp->unwindNop(); -#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_RISCV64) + comp->unwindNop(); +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) } /***************************************************************************** @@ -3264,7 +3272,7 @@ void emitter::emitUnwindNopPadding(emitLocation* locFrom, Compiler* comp) emitWalkIDs(locFrom, emitGenerateUnwindNop, comp); } -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 #if EMIT_BACKWARDS_NAVIGATION @@ -3691,6 +3699,10 @@ const size_t hexEncodingSize = 11; #elif defined(TARGET_LOONGARCH64) const size_t basicIndent = 12; const size_t hexEncodingSize = 19; +#elif defined(TARGET_RISCV64) +// TODO RISCV64 +const size_t basicIndent = 12; +const size_t hexEncodingSize = 19; #endif #ifdef DEBUG @@ -4070,6 +4082,7 @@ void emitter::emitDispIG(insGroup* ig, bool displayFunc, bool displayInstruction printf("\n"); +#if !defined(TARGET_RISCV64) if (displayInstructions) { instrDesc* id = emitFirstInstrDesc(ig->igData); @@ -4102,6 +4115,7 @@ void emitter::emitDispIG(insGroup* ig, bool displayFunc, bool displayInstruction printf("\n"); } } +#endif // !TARGET_RISCV64 } } @@ -4665,7 +4679,7 @@ void emitter::emitRemoveJumpToNextInst() * LoongArch64 has an individual implementation for emitJumpDistBind(). */ -#ifndef TARGET_LOONGARCH64 +#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) void emitter::emitJumpDistBind() { #ifdef DEBUG @@ -6259,8 +6273,8 @@ emitter::instrDescAlign* emitter::emitAlignInNextIG(instrDescAlign* alignInstr) void emitter::emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG) { -#ifdef TARGET_LOONGARCH64 - // TODO-LoongArch64: support idDebugOnlyInfo. +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + // TODO LoongArch64 / RISCV64: support idDebugOnlyInfo. return; #else @@ -6598,7 +6612,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, } #endif -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // For arm64/LoongArch64, we're going to put the data in the code section. So make sure the code section has // adequate alignment. if (emitConsDsc.dsdOffs > 0) @@ -7167,8 +7181,10 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, #elif defined(TARGET_LOONGARCH64) isJccAffectedIns = true; +#elif defined(TARGET_RISCV64) -#endif // TARGET_LOONGARCH64 + isJccAffectedIns = true; +#endif // TARGET_RISCV64 // Jcc affected instruction boundaries were printed above; handle other cases here. if (!isJccAffectedIns) @@ -7347,8 +7363,8 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, #elif defined(TARGET_ARM64) assert(!jmp->idAddr()->iiaHasInstrCount()); emitOutputLJ(NULL, adr, jmp); -#elif defined(TARGET_LOONGARCH64) - // For LoongArch64 `emitFwdJumps` is always false. +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + // For LoongArch64 and Riscv64 `emitFwdJumps` is always false. unreached(); #else #error Unsupported or unset target architecture @@ -7363,8 +7379,8 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, #elif defined(TARGET_ARMARCH) assert(!jmp->idAddr()->iiaHasInstrCount()); emitOutputLJ(NULL, adr, jmp); -#elif defined(TARGET_LOONGARCH64) - // For LoongArch64 `emitFwdJumps` is always false. +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + // For LoongArch64 and Riscv64 `emitFwdJumps` is always false. unreached(); #else #error Unsupported or unset target architecture @@ -10162,7 +10178,7 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) result = RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF; break; -#if !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) case CORINFO_HELP_PROF_FCN_ENTER: result = RBM_PROFILER_ENTER_TRASH; break; @@ -10179,7 +10195,7 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) case CORINFO_HELP_PROF_FCN_TAILCALL: result = RBM_PROFILER_TAILCALL_TRASH; break; -#endif // !defined(TARGET_LOONGARCH64) +#endif // !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) #if defined(TARGET_X86) case CORINFO_HELP_INIT_PINVOKE_FRAME: diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 6fdbd8bf5abd9f..6e9f035743612b 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -650,6 +650,8 @@ class emitter insFormat _idInsFmt : 7; #elif defined(TARGET_LOONGARCH64) unsigned _idCodeSize : 5; // the instruction(s) size of this instrDesc described. +#elif defined(TARGET_RISCV64) + unsigned _idCodeSize : 6; // the instruction(s) size of this instrDesc described. #else static_assert_no_msg(IF_COUNT <= 256); insFormat _idInsFmt : 8; @@ -685,6 +687,16 @@ class emitter void idInsFmt(insFormat insFmt) { } +#elif defined(TARGET_RISCV64) + insFormat idInsFmt() const + { + _ASSERTE(!"TODO RISCV64 NYI"); + return (insFormat)0; + } + void idInsFmt(insFormat insFmt) + { + _ASSERTE(!"TODO RISCV64 NYI"); + } #else insFormat idInsFmt() const { @@ -714,11 +726,11 @@ class emitter opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16, 5=32 // At this point we have fully consumed first DWORD so that next field // doesn't cross a byte boundary. -#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) /* _idOpSize defined below. */ #else opSize _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8 -#endif // ARM || TARGET_LOONGARCH64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 // On Amd64, this is where the second DWORD begins // On System V a call could return a struct in 2 registers. The instrDescCGCA struct below has member that @@ -773,6 +785,13 @@ class emitter unsigned _idLclVar : 1; // access a local on stack. #endif +#ifdef TARGET_RISCV64 + // TODO RISCV64 + opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16 + insOpts _idInsOpt : 6; // options for instructions + unsigned _idLclVar : 1; // access a local on stack +#endif + #ifdef TARGET_ARM insSize _idInsSize : 2; // size of instruction: 16, 32 or 48 bits insFlags _idInsFlags : 1; // will this instruction set the flags @@ -799,7 +818,7 @@ class emitter #define ID_EXTRA_BITFIELD_BITS (16) #elif defined(TARGET_ARM64) #define ID_EXTRA_BITFIELD_BITS (17) -#elif defined(TARGET_XARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_XARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // TODO RISCV64 #define ID_EXTRA_BITFIELD_BITS (14) #else #error Unsupported or unset target architecture @@ -899,7 +918,7 @@ class emitter // TODO-Cleanup: We should really add a DEBUG-only tag to this union so we can add asserts // about reading what we think is here, to avoid unexpected corruption issues. -#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) emitLclVarAddr iiaLclVar; #endif BasicBlock* iiaBBlabel; @@ -953,7 +972,7 @@ class emitter regNumber _idReg3 : REGNUM_BITS; regNumber _idReg4 : REGNUM_BITS; }; -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) struct { unsigned int iiaEncodedInstr; // instruction's binary encoding. @@ -984,7 +1003,7 @@ class emitter { return iiaJmpOffset; } -#endif // defined(TARGET_LOONGARCH64) +#endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) } _idAddrUnion; @@ -1097,7 +1116,19 @@ class emitter assert(sz <= 16); _idCodeSize = sz; } -#endif // TARGET_LOONGARCH64 +#elif defined(TARGET_RISCV64) + unsigned idCodeSize() const + { + return _idCodeSize; + } + void idCodeSize(unsigned sz) + { + // RISCV64's instrDesc is not always meaning only one instruction. + // e.g. the `emitter::emitIns_I_la` for emitting the immediates. + assert(sz <= 32); + _idCodeSize = sz; + } +#endif emitAttr idOpSize() { @@ -1280,6 +1311,42 @@ class emitter #endif // TARGET_LOONGARCH64 +#ifdef TARGET_RISCV64 + insOpts idInsOpt() const + { + return (insOpts)_idInsOpt; + } + void idInsOpt(insOpts opt) + { + _idInsOpt = opt; + assert(opt == _idInsOpt); + } + + regNumber idReg3() const + { + assert(!idIsSmallDsc()); + return idAddr()->_idReg3; + } + void idReg3(regNumber reg) + { + assert(!idIsSmallDsc()); + idAddr()->_idReg3 = reg; + assert(reg == idAddr()->_idReg3); + } + regNumber idReg4() const + { + assert(!idIsSmallDsc()); + return idAddr()->_idReg4; + } + void idReg4(regNumber reg) + { + assert(!idIsSmallDsc()); + idAddr()->_idReg4 = reg; + assert(reg == idAddr()->_idReg4); + } + +#endif // TARGET_RISCV64 + inline static bool fitsInSmallCns(ssize_t val) { return ((val >= ID_MIN_SMALL_CNS) && (val <= ID_MAX_SMALL_CNS)); @@ -1379,6 +1446,17 @@ class emitter } #endif // TARGET_LOONGARCH64 +#ifdef TARGET_RISCV64 + bool idIsLclVar() const + { + return _idLclVar != 0; + } + void idSetIsLclVar() + { + _idLclVar = 1; + } +#endif // TARGET_RISCV64 + bool idIsCnsReloc() const { return _idCnsReloc != 0; @@ -1570,6 +1648,24 @@ class emitter #define PERFSCORE_LATENCY_WR_GENERAL PERFSCORE_LATENCY_1C #define PERFSCORE_LATENCY_RD_WR_GENERAL PERFSCORE_LATENCY_4C +#elif defined(TARGET_RISCV64) +// a read,write or modify from stack location, possible def to use latency from L0 cache +#define PERFSCORE_LATENCY_RD_STACK PERFSCORE_LATENCY_3C +#define PERFSCORE_LATENCY_WR_STACK PERFSCORE_LATENCY_1C +#define PERFSCORE_LATENCY_RD_WR_STACK PERFSCORE_LATENCY_3C + +// a read, write or modify from constant location, possible def to use latency from L0 cache +#define PERFSCORE_LATENCY_RD_CONST_ADDR PERFSCORE_LATENCY_3C +#define PERFSCORE_LATENCY_WR_CONST_ADDR PERFSCORE_LATENCY_1C +#define PERFSCORE_LATENCY_RD_WR_CONST_ADDR PERFSCORE_LATENCY_3C + +// a read, write or modify from memory location, possible def to use latency from L0 or L1 cache +// plus an extra cost (of 1.0) for a increased chance of a cache miss +#define PERFSCORE_LATENCY_RD_GENERAL PERFSCORE_LATENCY_4C +#define PERFSCORE_LATENCY_WR_GENERAL PERFSCORE_LATENCY_1C +#define PERFSCORE_LATENCY_RD_WR_GENERAL PERFSCORE_LATENCY_4C + + #endif // TARGET_XXX // Make this an enum: @@ -2020,9 +2116,9 @@ class emitter #endif // defined(TARGET_X86) #endif // !defined(HOST_64BIT) -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) unsigned int emitCounts_INS_OPTS_J; -#endif // TARGET_LOONGARCH64 +#endif // TARGET_LOONGARCH64 || TARGET_RISCV64 instrDesc* emitFirstInstrDesc(BYTE* idData); void emitAdvanceInstrDesc(instrDesc** id, size_t idSize); @@ -2341,7 +2437,7 @@ class emitter void emitPrintLabel(insGroup* ig); const char* emitLabelString(insGroup* ig); -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void emitGetInstrDescs(insGroup* ig, instrDesc** id, int* insCnt); @@ -2561,6 +2657,9 @@ class emitter #elif defined(TARGET_LOONGARCH64) bool emitInsMayWriteToGCReg(instruction ins); bool emitInsWritesToLclVarStackLoc(instrDesc* id); +#elif defined(TARGET_RISCV64) + bool emitInsMayWriteToGCReg(instruction ins); + bool emitInsWritesToLclVarStackLoc(instrDesc* id); #endif // TARGET_LOONGARCH64 /************************************************************************/ @@ -2867,7 +2966,9 @@ class emitter inline void emitter::instrDesc::checkSizes() { +#ifndef TARGET_RISCV64 // TODO RISCV64 C_ASSERT(SMALL_IDSC_SIZE == offsetof(instrDesc, _idAddrUnion)); +#endif // TARGET_RISCV64 } /***************************************************************************** diff --git a/src/coreclr/jit/emitdef.h b/src/coreclr/jit/emitdef.h index 35b46314a1225a..1d261919e7e51d 100644 --- a/src/coreclr/jit/emitdef.h +++ b/src/coreclr/jit/emitdef.h @@ -14,6 +14,8 @@ #include "emitarm64.h" #elif defined(TARGET_LOONGARCH64) #include "emitloongarch64.h" +#elif defined(TARGET_RISCV64) +#include "emitriscv64.h" #else #error Unsupported or unset target architecture #endif diff --git a/src/coreclr/jit/emitfmts.h b/src/coreclr/jit/emitfmts.h index 77712ed95cce3d..883b4458d94290 100644 --- a/src/coreclr/jit/emitfmts.h +++ b/src/coreclr/jit/emitfmts.h @@ -10,6 +10,8 @@ #include "emitfmtsarm64.h" #elif defined(TARGET_LOONGARCH64) #include "emitfmtsloongarch64.h" +#elif defined(TARGET_RISCV64) +#include "emitfmtsriscv64.h" #else #error Unsupported or unset target architecture #endif // target type diff --git a/src/coreclr/jit/emitfmtsriscv64.h b/src/coreclr/jit/emitfmtsriscv64.h new file mode 100644 index 00000000000000..31bf86c7f14f9a --- /dev/null +++ b/src/coreclr/jit/emitfmtsriscv64.h @@ -0,0 +1,43 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +////////////////////////////////////////////////////////////////////////////// + +// clang-format off +#if !defined(TARGET_RISCV64) +#error Unexpected target type +#endif + +#ifdef DEFINE_ID_OPS +////////////////////////////////////////////////////////////////////////////// + +enum ID_OPS +{ + ID_OP_NONE, // no additional arguments +}; + +#undef DEFINE_ID_OPS + +////////////////////////////////////////////////////////////////////////////// +#else // !DEFINE_ID_OPS +////////////////////////////////////////////////////////////////////////////// + +#ifndef IF_DEF +#error Must define IF_DEF macro before including this file +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// enum insFormat instruction enum ID_OPS +// scheduling +// (unused) +////////////////////////////////////////////////////////////////////////////// + +IF_DEF(NONE, IS_NONE, NONE) + +///////////////////////////////////////////////////////////////////////////////////////////////////////// +#undef IF_DEF +////////////////////////////////////////////////////////////////////////////// + +#endif // !DEFINE_ID_OPS +////////////////////////////////////////////////////////////////////////////// +// clang-format on diff --git a/src/coreclr/jit/emitinl.h b/src/coreclr/jit/emitinl.h index 125c1ddd0fbd3f..90498a4df2083b 100644 --- a/src/coreclr/jit/emitinl.h +++ b/src/coreclr/jit/emitinl.h @@ -362,6 +362,42 @@ inline ssize_t emitter::emitGetInsAmdAny(instrDesc* id) id->idReg2((regNumber)encodeMask); // Save in idReg2 +#elif defined(TARGET_RISCV64) + assert(REGNUM_BITS >= 6); + encodeMask = 0; + + //if ((regmask & RBM_S0) != RBM_NONE) + // encodeMask |= 0x01; + if ((regmask & RBM_S1) != RBM_NONE) + encodeMask |= 0x01; + if ((regmask & RBM_S2) != RBM_NONE) + encodeMask |= 0x02; + if ((regmask & RBM_S3) != RBM_NONE) + encodeMask |= 0x04; + if ((regmask & RBM_S4) != RBM_NONE) + encodeMask |= 0x08; + if ((regmask & RBM_S5) != RBM_NONE) + encodeMask |= 0x10; + + id->idReg1((regNumber)encodeMask); // Save in idReg1 + + encodeMask = 0; + + if ((regmask & RBM_S6) != RBM_NONE) + encodeMask |= 0x01; + if ((regmask & RBM_S7) != RBM_NONE) + encodeMask |= 0x02; + if ((regmask & RBM_S8) != RBM_NONE) + encodeMask |= 0x04; + if ((regmask & RBM_S9) != RBM_NONE) + encodeMask |= 0x08; + if ((regmask & RBM_S10) != RBM_NONE) + encodeMask |= 0x10; + if ((regmask & RBM_S11) != RBM_NONE) + encodeMask |= 0x20; + + id->idReg2((regNumber)encodeMask); // Save in idReg2 + #else NYI("unknown target"); #endif @@ -500,6 +536,39 @@ inline ssize_t emitter::emitGetInsAmdAny(instrDesc* id) if ((encodeMask & 0x08) != 0) regmask |= RBM_S8; +#elif defined(TARGET_RISCV64) + assert(REGNUM_BITS >= 6); + encodeMask = id->idReg1(); + + //if ((encodeMask & 0x01) != 0) // TODO CHECK S0 is FP + // regmask |= RBM_S0; + if ((encodeMask & 0x01) != 0) + regmask |= RBM_S1; + if ((encodeMask & 0x02) != 0) + regmask |= RBM_S2; + if ((encodeMask & 0x04) != 0) + regmask |= RBM_S3; + if ((encodeMask & 0x08) != 0) + regmask |= RBM_S4; + if ((encodeMask & 0x10) != 0) + regmask |= RBM_S5; + if ((encodeMask & 0x20) != 0) + regmask |= RBM_S6; + + encodeMask = id->idReg2(); + + if ((encodeMask & 0x01) != 0) + regmask |= RBM_S7; + if ((encodeMask & 0x02) != 0) + regmask |= RBM_S8; + if ((encodeMask & 0x04) != 0) + regmask |= RBM_S9; + if ((encodeMask & 0x08) != 0) + regmask |= RBM_S10; + if ((encodeMask & 0x10) != 0) + regmask |= RBM_S11; + + #else NYI("unknown target"); #endif diff --git a/src/coreclr/jit/emitjmps.h b/src/coreclr/jit/emitjmps.h index cd10727f6eec32..740634675c4e9b 100644 --- a/src/coreclr/jit/emitjmps.h +++ b/src/coreclr/jit/emitjmps.h @@ -53,6 +53,12 @@ JMP_SMALL(jmp , jmp , b ) JMP_SMALL(eq , ne , beq ) // EQ JMP_SMALL(ne , eq , bne ) // NE +#elif defined(TARGET_RISCV64) +// TODO RISCV64: adding other condition branches +JMP_SMALL(jmp , jmp , j ) +JMP_SMALL(eq , ne , beq ) // EQ +JMP_SMALL(ne , eq , bne ) // NE + #else #error Unsupported or unset target architecture #endif // target type diff --git a/src/coreclr/jit/emitpub.h b/src/coreclr/jit/emitpub.h index 0133fb19f0212d..9fcb29f42b7dbc 100644 --- a/src/coreclr/jit/emitpub.h +++ b/src/coreclr/jit/emitpub.h @@ -133,7 +133,7 @@ static void InitTranslator(PDBRewriter* pPDB, int* rgSecMap, IMAGE_SECTION_HEADE /* Interface for generating unwind information */ /************************************************************************/ -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool emitIsFuncEnd(emitLocation* emitLoc, emitLocation* emitLocNextFragment = NULL); @@ -145,7 +145,7 @@ void emitSplit(emitLocation* startLoc, void emitUnwindNopPadding(emitLocation* locFrom, Compiler* comp); -#endif // TARGET_ARMARCH || defined(TARGET_LOONGARCH64) +#endif // TARGET_ARMARCH || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(TARGET_ARM) diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp new file mode 100644 index 00000000000000..0c5a4e8f8918ea --- /dev/null +++ b/src/coreclr/jit/emitriscv64.cpp @@ -0,0 +1,4282 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX emitriscv64.cpp XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#if defined(TARGET_RISCV64) + +/*****************************************************************************/ +/*****************************************************************************/ + +#include "instr.h" +#include "emit.h" +#include "codegen.h" + +/*****************************************************************************/ + +const instruction emitJumpKindInstructions[] = { + INS_nop, + +#define JMP_SMALL(en, rev, ins) INS_##ins, +#include "emitjmps.h" +}; + +const emitJumpKind emitReverseJumpKinds[] = { + EJ_NONE, + +#define JMP_SMALL(en, rev, ins) EJ_##rev, +#include "emitjmps.h" +}; + +/***************************************************************************** + * Look up the instruction for a jump kind + */ + +/*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind) +{ + assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions)); + return emitJumpKindInstructions[jumpKind]; +} + +/***************************************************************************** +* Look up the jump kind for an instruction. It better be a conditional +* branch instruction with a jump kind! +*/ + +/*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins) +{ + NYI_RISCV64("emitInsToJumpKind-----unimplemented on RISCV64 yet----"); + return EJ_NONE; +} + +/***************************************************************************** + * Reverse the conditional jump + */ + +/*static*/ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind) +{ + assert(jumpKind < EJ_COUNT); + return emitReverseJumpKinds[jumpKind]; +} + +/***************************************************************************** + * + * Return the allocated size (in bytes) of the given instruction descriptor. + */ + +size_t emitter::emitSizeOfInsDsc(instrDesc* id) +{ + if (emitIsScnsInsDsc(id)) + return SMALL_IDSC_SIZE; + + insOpts insOp = id->idInsOpt(); + + switch (insOp) + { + case INS_OPTS_JALR: + case INS_OPTS_J_cond: + case INS_OPTS_J: + return sizeof(instrDescJmp); + + case INS_OPTS_C: + if (id->idIsLargeCall()) + { + /* Must be a "fat" call descriptor */ + return sizeof(instrDescCGCA); + } + else + { + assert(!id->idIsLargeDsp()); + assert(!id->idIsLargeCns()); + return sizeof(instrDesc); + } + + case INS_OPTS_I: + case INS_OPTS_RC: + case INS_OPTS_RL: + case INS_OPTS_RELOC: + case INS_OPTS_NONE: + return sizeof(instrDesc); + default: + NO_WAY("unexpected instruction descriptor format"); + break; + } +} + + + +bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id) +{ + if (!id->idIsLclVar()) + return false; + + instruction ins = id->idIns(); + + // This list is related to the list of instructions used to store local vars in emitIns_S_R(). + // We don't accept writing to float local vars. + + switch (ins) + { + case INS_sd: + case INS_sw: + case INS_sb: + case INS_sh: + return true; + + default: + return false; + } +} + +#define LD 1 +#define ST 2 + +// clang-format off +/*static*/ const BYTE CodeGenInterface::instInfo[] = +{ + #define INST(id, nm, info, e1) info, + #include "instrs.h" +}; +// clang-format on + +inline bool emitter::emitInsMayWriteToGCReg(instruction ins) +{ + assert(ins != INS_invalid); + return (ins <= INS_remuw) && (ins >= INS_mov) && + !(ins >= INS_jal && ins <= INS_bgeu && ins != INS_jalr) && + (CodeGenInterface::instInfo[ins] & ST) == 0 ? true : false; // TODO CHECK +} + +//------------------------------------------------------------------------ +// emitInsLoad: Returns true if the instruction is some kind of load instruction. +// +bool emitter::emitInsIsLoad(instruction ins) +{ + // We have pseudo ins like lea which are not included in emitInsLdStTab. + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & LD) != 0; + else + return false; +} + +//------------------------------------------------------------------------ +// emitInsIsStore: Returns true if the instruction is some kind of store instruction. +// +bool emitter::emitInsIsStore(instruction ins) +{ + // We have pseudo ins like lea which are not included in emitInsLdStTab. + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & ST) != 0; + else + return false; +} + +//------------------------------------------------------------------------- +// emitInsIsLoadOrStore: Returns true if the instruction is some kind of load/store instruction. +// +bool emitter::emitInsIsLoadOrStore(instruction ins) +{ + // We have pseudo ins like lea which are not included in emitInsLdStTab. + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & (LD | ST)) != 0; + else + return false; +} + +/***************************************************************************** + * + * Returns the specific encoding of the given CPU instruction. + */ + +inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/) +{ + code_t code = BAD_CODE; + + // clang-format off + const static code_t insCode[] = + { + #define INST(id, nm, info, e1) e1, + #include "instrs.h" + }; + // clang-format on + + code = insCode[ins]; + + assert((code != BAD_CODE)); + + return code; +} + +/**************************************************************************** + * + * Add an instruction with no operands. + */ + +void emitter::emitIns(instruction ins) +{ + instrDesc* id = emitNewInstr(EA_8BYTE); + + id->idIns(ins); + id->idAddr()->iiaSetInstrEncode(emitInsCode(ins)); + id->idCodeSize(4); + + appendToCurIG(id); +} + +/***************************************************************************** + * emitter::emitIns_S_R() and emitter::emitIns_R_S(): + * + * Add an Load/Store instruction(s): base+offset and base-addr-computing if needed. + * For referencing a stack-based local variable and a register + * + */ +void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, regNumber tmpReg, int varx, int offs) +{ + ssize_t imm; + + assert(tmpReg != REG_RA); + + emitAttr size = EA_SIZE(attr); + +#ifdef DEBUG + switch (ins) + { + case INS_sd: + case INS_sw: + case INS_fsw: + case INS_fsd: + case INS_sb: + case INS_sh: + break; + + default: + NYI("emitIns_S_R"); + return; + + } // end switch (ins) +#endif + + /* Figure out the variable's frame position */ + int base; + bool FPbased; + + base = emitComp->lvaFrameAddress(varx, &FPbased); + imm = offs < 0 ? -offs - 8 : base + offs; + + regNumber reg3 = FPbased ? REG_FPBASE : REG_SPBASE; + // assert(offs >= 0); + regNumber reg2 = offs < 0 ? tmpReg : reg3; // TODO R21 => tmpReg + assert(reg2 != REG_NA && reg2 != REG_RA); + + // regNumber reg2 = reg3; + offs = offs < 0 ? -offs - 8 : offs; + + if ((-2048 <= imm) && (imm < 2048)) + { + // regs[1] = reg2; + } + else + { + // ssize_t imm3 = imm & 0x800; + // ssize_t imm2 = imm + imm3; + + assert(isValidSimm20((imm + 0x800) >> 12)); + emitIns_R_I(INS_lui, EA_PTRSIZE, REG_RA, (imm + 0x800) >> 12); + + emitIns_R_R_R(INS_add, EA_PTRSIZE, REG_RA, REG_RA, reg2); + // imm2 = imm2 & 0x7ff; + // imm = imm3 ? imm2 - imm3 : imm2; + imm = imm & 0xfff; + reg2 = REG_RA; + } + + if (tmpReg != REG_NA) + { + emitIns_R_R_R(INS_add, attr, reg2, reg2, reg3); + imm = 0; + } + + instrDesc* id = emitNewInstr(attr); + + id->idReg1(reg1); + + id->idReg2(reg2); + + id->idIns(ins); + + code_t code = emitInsCode(ins); + code |= (code_t)(reg1 & 0x1f) << 20; + code |= (code_t)(reg2 & 0x1f) << 15; + code |= (((imm >> 5) & 0x7f) << 25) | ((imm & 0x1f) << 7); + + id->idAddr()->iiaSetInstrEncode(code); + id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + id->idSetIsLclVar(); + id->idCodeSize(4); + + appendToCurIG(id); +} + +/* + * Special notes for `offs`, please see the comment for `emitter::emitIns_S_R`. + */ +void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) +{ + ssize_t imm; + + emitAttr size = EA_SIZE(attr); + +#ifdef DEBUG + switch (ins) + { + case INS_lb: + case INS_lbu: + + case INS_lh: + case INS_lhu: + + case INS_lw: + case INS_lwu: + case INS_flw: + + case INS_ld: + case INS_fld: + + break; + + case INS_lea: + assert(size == EA_8BYTE); + break; + + default: + NYI("emitIns_R_S"); + return; + + } // end switch (ins) +#endif + + /* Figure out the variable's frame position */ + int base; + bool FPbased; + + base = emitComp->lvaFrameAddress(varx, &FPbased); + imm = offs < 0 ? -offs - 8 : base + offs; + + regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; + assert(offs >= 0); + //reg2 = offs < 0 ? REG_R21 : reg2; // TODO + offs = offs < 0 ? -offs - 8 : offs; + + reg1 = (regNumber)((char)reg1 & 0x1f); + code_t code; + if ((-2048 <= imm) && (imm < 2048)) + { + if (ins == INS_lea) + { + ins = INS_addi; + } + code = emitInsCode(ins); + code |= (code_t)reg1 << 7; + code |= (code_t)reg2 << 15; + code |= (imm & 0xfff) << 20; + } + else + { + if (ins == INS_lea) + { + assert(isValidSimm20((imm + 0x800) >> 12)); + emitIns_R_I(INS_lui, EA_PTRSIZE, REG_RA, (imm + 0x800) >> 12); + ssize_t imm2 = imm & 0xfff; + emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_RA, REG_RA, imm2); + + ins = INS_add; + code = emitInsCode(ins); + code |= (code_t)reg1 << 7; + code |= (code_t)reg2 << 15; + code |= (code_t)REG_RA << 20; + } + else + { + assert(isValidSimm20((imm + 0x800) >> 12)); + emitIns_R_I(INS_lui, EA_PTRSIZE, REG_RA, (imm + 0x800) >> 12); + + emitIns_R_R_R(INS_add, EA_PTRSIZE, REG_RA, REG_RA, reg2); + + ssize_t imm2 = imm & 0xfff; + code = emitInsCode(ins); + code |= (code_t)reg1 << 7; + code |= (code_t)REG_RA << 15; + code |= (code_t)(imm2 & 0xfff) << 20; + } + } + + instrDesc* id = emitNewInstr(attr); + + id->idReg1(reg1); + + id->idIns(ins); + + id->idAddr()->iiaSetInstrEncode(code); + id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + id->idSetIsLclVar(); + id->idCodeSize(4); + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction with a single immediate value. + */ + +void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm) +{ + code_t code = emitInsCode(ins); + + switch (ins) + { + case INS_fence: + code |= ((imm & 0xff) << 20); + break; + case INS_j: + assert(imm >= -1048576 && imm < 1048576); + code |= ((imm >> 12) & 0xff) << 12; + code |= ((imm >> 11) & 0x1) << 20; + code |= ((imm >> 1) & 0x3ff) << 21; + code |= ((imm >> 20) & 0x1) << 31; + break; + default: + fprintf(stderr, "emitIns_I %llx %llx\n", ins, code); + unreached(); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idAddr()->iiaSetInstrEncode(code); + id->idCodeSize(4); + + appendToCurIG(id); +} + +void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t offs) +{ + _ASSERTE(!"TODO RISCV64 NYI"); +} + +/***************************************************************************** + * + * Add an instruction referencing a register and a constant. + */ + +void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */) +{ + code_t code = emitInsCode(ins); + + switch(ins) + { + case INS_lui: + case INS_auipc: + assert(reg != REG_R0); + assert(isGeneralRegister(reg)); + assert((((size_t)imm) >> 20) == 0); + + code |= reg << 7; + code |= (imm & 0xfffff) << 12; + break; + case INS_jal: + assert(isGeneralRegisterOrR0(reg)); + assert(imm >= -1048576 && imm < 1048576); + + code != reg << 7; + code |= ((imm >> 12) & 0xff) << 12; + code |= ((imm >> 11) & 0x1) << 20; + code |= ((imm >> 1) & 0x3ff) << 21; + code |= ((imm >> 20) & 0x1) << 31; + break; + default: + fprintf(stderr, "emitIns_R_I %llx %llx\n", ins, code); + unreached(); + break; + } // end switch (ins) + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg); + id->idAddr()->iiaSetInstrEncode(code); + id->idCodeSize(4); + + appendToCurIG(id); +} + +//------------------------------------------------------------------------ +// emitIns_Mov: Emits a move instruction +// +// Arguments: +// ins -- The instruction being emitted +// attr -- The emit attribute +// dstReg -- The destination register +// srcReg -- The source register +// canSkip -- true if the move can be elided when dstReg == srcReg, otherwise false +// insOpts -- The instruction options +// +void emitter::emitIns_Mov( + instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt /* = INS_OPTS_NONE */) +{ + // assert(IsMovInstruction(ins)); + if (!canSkip || (dstReg != srcReg)) + { + if ((EA_4BYTE == attr) && (INS_mov == ins)) + emitIns_R_R_I(INS_addiw, attr, dstReg, srcReg, 0); + else if (INS_fsgnj_s == ins || INS_fsgnj_d == ins) + emitIns_R_R_R(ins, attr, dstReg, srcReg, srcReg); + else if (genIsValidFloatReg(srcReg) || genIsValidFloatReg(dstReg)) + emitIns_R_R(ins, attr, dstReg, srcReg); + else + emitIns_R_R_I(INS_addi, attr, dstReg, srcReg, 0); + } +} + +/***************************************************************************** + * + * Add an instruction referencing two registers + */ + +void emitter::emitIns_R_R( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt /* = INS_OPTS_NONE */) +{ + code_t code = emitInsCode(ins); + + if (INS_mov == ins) + { + assert(isGeneralRegisterOrR0(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + code |= reg1 << 7; + code |= reg2 << 15; + } + else if ((INS_fcvt_w_s <= ins && INS_fmv_x_w >= ins) || + (INS_fclass_s == ins || INS_fclass_d == ins) || + (INS_fcvt_w_d == ins || INS_fcvt_wu_d == ins) || + (INS_fcvt_l_s == ins || INS_fcvt_lu_s == ins) || + (INS_fmv_x_d == ins)) + { + // TODO CHECK ROUNDING MODE + assert(isGeneralRegisterOrR0(reg1)); + assert(isFloatReg(reg2)); + code |= (reg1 & 0x1f) << 7; + code |= (reg2 & 0x1f) << 15; + } + else if ((INS_fcvt_s_w <= ins && INS_fmv_w_x >= ins) || + (INS_fcvt_d_w == ins || INS_fcvt_d_wu == ins) || + (INS_fcvt_s_l == ins || INS_fcvt_s_lu == ins) || + (INS_fmv_d_x == ins)) + + { + // TODO CHECK ROUNDING MODE + assert(isFloatReg(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + code |= (reg1 & 0x1f) << 7; + code |= (reg2 & 0x1f) << 15; + } + else if (INS_fcvt_s_d == ins || INS_fcvt_d_s == ins) + { + assert(isFloatReg(reg1)); + assert(isFloatReg(reg2)); + code |= (reg1 & 0x1f) << 7; + code |= (reg2 & 0x1f) << 15; + } + else + { + NYI_RISCV64("illegal ins within emitIns_R_R!"); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + id->idAddr()->iiaSetInstrEncode(code); + id->idCodeSize(4); + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction referencing two registers and a constant. + */ + +void emitter::emitIns_R_R_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */) +{ + code_t code = emitInsCode(ins); + if ((INS_addi <= ins && INS_srai >= ins) || + (INS_addiw <= ins && INS_sraiw >= ins) || + (INS_lb <= ins && INS_lhu >= ins) || + INS_ld == ins || INS_lw == ins || INS_jalr == ins || + INS_fld == ins || INS_flw == ins) + { + code |= (reg1 & 0x1f) << 7; // rd + code |= (reg2 & 0x1f) << 15; // rs1 + code |= imm << 20; // imm + } + else if (INS_sd == ins || INS_sw == ins || INS_sh == ins || INS_sb == ins || INS_fsw == ins || INS_fsd == ins) + { + code |= (reg1 & 0x1f) << 20; // rs2 + code |= (reg2 & 0x1f) << 15; // rs1 + code |= (((imm >> 5) & 0x7f) << 25) | ((imm & 0x1f) << 7); // imm + } + else if (INS_beq <= ins && INS_bgeu >= ins) + { + code |= (reg1 & 0x1f) << 15; + code |= (reg2 & 0x1f) << 20; + code |= ((imm >> 11) & 0x1) << 7; + code |= ((imm >> 1) & 0xf) << 8; + code |= ((imm >> 5) & 0x3f) << 25; + code |= ((imm >> 12) & 0x1) << 31; + } + else + { + fprintf(stderr, "Not implemented instruction in R_R_I: 0x%x\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + id->idAddr()->iiaSetInstrEncode(code); + id->idCodeSize(4); + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction referencing three registers. + */ + +void emitter::emitIns_R_R_R( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insOpts opt) /* = INS_OPTS_NONE */ +{ + code_t code = emitInsCode(ins); + + if ((INS_add <= ins && ins <= INS_and) || + (INS_mul <= ins && ins <= INS_remuw) || + (INS_addw <= ins && ins <= INS_sraw) || + (INS_fadd_s <= ins && ins <= INS_fmax_s) || + (INS_fadd_d <= ins && ins <= INS_fmax_d) || + (INS_feq_s <= ins && ins <= INS_fle_s) || + (INS_feq_d <= ins && ins <= INS_fle_d)) + { +#ifdef DEBUG + switch (ins) + { + case INS_add: + case INS_sub: + case INS_sll: + case INS_slt: + case INS_sltu: + case INS_xor: + case INS_srl: + case INS_sra: + case INS_or: + case INS_and: + + case INS_addw: + case INS_subw: + case INS_sllw: + case INS_srlw: + case INS_sraw: + + case INS_mul: + case INS_mulh: + case INS_mulhsu: + case INS_mulhu: + case INS_div: + case INS_divu: + case INS_rem: + case INS_remu: + + case INS_mulw: + case INS_divw: + case INS_divuw: + case INS_remw: + case INS_remuw: + + case INS_fadd_s: + case INS_fsub_s: + case INS_fmul_s: + case INS_fdiv_s: + case INS_fsqrt_s: + case INS_fsgnj_s: + case INS_fsgnjn_s: + case INS_fsgnjx_s: + case INS_fmin_s: + case INS_fmax_s: + + case INS_feq_s: + case INS_flt_s: + case INS_fle_s: + + case INS_fadd_d: + case INS_fsub_d: + case INS_fmul_d: + case INS_fdiv_d: + case INS_fsqrt_d: + case INS_fsgnj_d: + case INS_fsgnjn_d: + case INS_fsgnjx_d: + case INS_fmin_d: + case INS_fmax_d: + + case INS_feq_d: + case INS_flt_d: + case INS_fle_d: + + break; + default: + NYI_RISCV64("illegal ins within emitIns_R_R_R --1!"); + } + +#endif + code |= ((reg1 & 0x1f) << 7); + code |= ((reg2 & 0x1f) << 15); + code |= ((reg3 & 0x1f) << 20); + } + else + { + fprintf(stderr, "Not implemented instruction in R_R_R: 0x%x\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + id->idAddr()->iiaSetInstrEncode(code); + id->idCodeSize(4); + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction referencing three registers and a constant. + */ + +void emitter::emitIns_R_R_R_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + ssize_t imm, + insOpts opt /* = INS_OPTS_NONE */, + emitAttr attrReg2 /* = EA_UNKNOWN */) +{ + _ASSERTE(!"TODO RISCV64 NYI"); +} + +/***************************************************************************** + * + * Add an instruction referencing two registers and two constants. + */ + +void emitter::emitIns_R_R_I_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt) +{ + _ASSERTE(!"TODO RISCV64 NYI"); +} + +/***************************************************************************** + * + * Add an instruction referencing four registers. + */ + +void emitter::emitIns_R_R_R_R( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4) +{ + _ASSERTE(!"TODO RISCV64 NYI"); +} + +/***************************************************************************** + * + * Add an instruction with a register + static member operands. + * Constant is stored into JIT data which is adjacent to code. + * + */ +void emitter::emitIns_R_C( + instruction ins, emitAttr attr, regNumber reg, regNumber addrReg, CORINFO_FIELD_HANDLE fldHnd, int offs) +{ + assert(offs >= 0); + assert(instrDesc::fitsInSmallCns(offs)); // can optimize. + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + assert(reg != REG_R0); // for special. reg Must not be R0. + id->idReg1(reg); // destination register that will get the constant value. + + id->idSmallCns(offs); // usually is 0. + id->idInsOpt(INS_OPTS_RC); + if (emitComp->opts.compReloc) + { + id->idSetIsDspReloc(); + id->idCodeSize(8); + } + else + id->idCodeSize(16); + + if (EA_IS_GCREF(attr)) + { + /* A special value indicates a GCref pointer value */ + id->idGCref(GCT_GCREF); + id->idOpSize(EA_PTRSIZE); + } + else if (EA_IS_BYREF(attr)) + { + /* A special value indicates a Byref pointer value */ + id->idGCref(GCT_BYREF); + id->idOpSize(EA_PTRSIZE); + } + + // TODO-RISCV64: this maybe deleted. + id->idSetIsBound(); // We won't patch address since we will know the exact distance + // once JIT code and data are allocated together. + + assert(addrReg == REG_NA); // NOTE: for LOONGARCH64, not support addrReg != REG_NA. + + id->idAddr()->iiaFieldHnd = fldHnd; + + appendToCurIG(id); +} + +void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs) +{ + NYI_RISCV64("emitIns_R_AR-----unimplemented/unused on RISCV64 yet----"); +} + +// This computes address from the immediate which is relocatable. +void emitter::emitIns_R_AI(instruction ins, + emitAttr attr, + regNumber reg, + ssize_t addr DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags)) +{ + assert(EA_IS_RELOC(attr)); // EA_PTR_DSP_RELOC + assert(ins == INS_jal); // for special. + assert(isGeneralRegister(reg)); + + // INS_OPTS_RELOC: placeholders. 2-ins: + // case:EA_HANDLE_CNS_RELOC + // auipc reg, off-hi-20bits + // addi reg, reg, off-lo-12bits + // case:EA_PTR_DSP_RELOC + // auipc reg, off-hi-20bits + // ld reg, reg, off-lo-12bits + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + assert(reg != REG_R0); // for special. reg Must not be R0. + id->idReg1(reg); // destination register that will get the constant value. + + id->idInsOpt(INS_OPTS_RELOC); + + if (EA_IS_GCREF(attr)) + { + /* A special value indicates a GCref pointer value */ + id->idGCref(GCT_GCREF); + id->idOpSize(EA_PTRSIZE); + } + else if (EA_IS_BYREF(attr)) + { + /* A special value indicates a Byref pointer value */ + id->idGCref(GCT_BYREF); + id->idOpSize(EA_PTRSIZE); + } + + id->idAddr()->iiaAddr = (BYTE*)addr; + id->idCodeSize(8); + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Record that a jump instruction uses the short encoding + * + */ +void emitter::emitSetShortJump(instrDescJmp* id) +{ + // TODO-RISCV64: maybe delete it on future. + NYI_RISCV64("emitSetShortJump-----unimplemented/unused on RISCV64 yet----"); +} + +/***************************************************************************** + * + * Add a label instruction. + */ + +void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg) +{ + assert(dst->bbFlags & BBF_HAS_LABEL); + + // if for reloc! 4-ins: + // auipc reg, offset-hi20 + // addi reg, reg, offset-lo12 + // + // else: 3-ins: + // lui tmp, dst-hi-20bits + // addi tmp, tmp, dst-lo-12bits + // lui reg, 0xff << 12 + // slli reg, reg, 32 + // add reg, tmp, reg + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idInsOpt(INS_OPTS_RL); + id->idAddr()->iiaBBlabel = dst; + + if (emitComp->opts.compReloc) + { + id->idSetIsDspReloc(); + id->idCodeSize(8); + } + else + id->idCodeSize(20); + + id->idReg1(reg); + + if (EA_IS_GCREF(attr)) + { + /* A special value indicates a GCref pointer value */ + id->idGCref(GCT_GCREF); + id->idOpSize(EA_PTRSIZE); + } + else if (EA_IS_BYREF(attr)) + { + /* A special value indicates a Byref pointer value */ + id->idGCref(GCT_BYREF); + id->idOpSize(EA_PTRSIZE); + } + +#ifdef DEBUG + // Mark the catch return + if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET) + { + id->idDebugOnlyInfo()->idCatchRet = true; + } +#endif // DEBUG + + appendToCurIG(id); +} + +void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg) +{ + NYI_RISCV64("emitIns_J_R-----unimplemented/unused on RISCV64 yet----"); +} + +void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) +{ + assert(dst != nullptr); + // + // INS_OPTS_J: placeholders. 1-ins: if the dst outof-range will be replaced by INS_OPTS_JALR. + // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl dst + + assert(dst->bbFlags & BBF_HAS_LABEL); + + instrDescJmp* id = emitNewInstrJmp(); + assert((INS_jal <= ins) && (ins <= INS_bgeu)); + id->idIns(ins); + id->idReg1((regNumber)(instrCount & 0x1f)); + id->idReg2((regNumber)((instrCount >> 5) & 0x1f)); + + id->idInsOpt(INS_OPTS_J); + emitCounts_INS_OPTS_J++; + id->idAddr()->iiaBBlabel = dst; + + if (emitComp->opts.compReloc) + { + id->idSetIsDspReloc(); + } + + id->idjShort = false; + + // TODO-RISCV64: maybe deleted this. + id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); +#ifdef DEBUG + if (emitComp->opts.compLongAddress) // Force long branches + id->idjKeepLong = 1; +#endif // DEBUG + + /* Record the jump's IG and offset within it */ + id->idjIG = emitCurIG; + id->idjOffs = emitCurIGsize; + + /* Append this jump to this IG's jump list */ + id->idjNext = emitCurIGjmpList; + emitCurIGjmpList = id; + +#if EMITTER_STATS + emitTotalIGjmps++; +#endif + + id->idCodeSize(4); + + appendToCurIG(id); +} + +void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1, regNumber reg2) +{ + // TODO-RISCV64: + // Now the emitIns_J_cond_la() is only the short condition branch. + // There is no long condition branch for RISCV64 so far. + // For RISCV64 , the long condition branch is like this: + // ---> branch_condition condition_target; //here is the condition branch, short branch is enough. + // ---> jump jump_target; (this supporting the long jump.) + // condition_target: + // ... + // ... + // jump_target: + // + // + // INS_OPTS_J_cond: placeholders. 1-ins. + // ins reg1, reg2, dst + + assert(dst != nullptr); + assert(dst->bbFlags & BBF_HAS_LABEL); + + instrDescJmp* id = emitNewInstrJmp(); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + id->idjShort = false; + + id->idInsOpt(INS_OPTS_J_cond); + id->idAddr()->iiaBBlabel = dst; + + id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); +#ifdef DEBUG + if (emitComp->opts.compLongAddress) // Force long branches + id->idjKeepLong = 1; +#endif // DEBUG + + /* Record the jump's IG and offset within it */ + id->idjIG = emitCurIG; + id->idjOffs = emitCurIGsize; + + /* Append this jump to this IG's jump list */ + id->idjNext = emitCurIGjmpList; + emitCurIGjmpList = id; + +#if EMITTER_STATS + emitTotalIGjmps++; +#endif + + id->idCodeSize(4); + + appendToCurIG(id); +} + +void emitter::emitIns_I_la(emitAttr size, regNumber reg, ssize_t imm) +{ + assert(!EA_IS_RELOC(size)); + assert(isGeneralRegister(reg)); + if (0 == ((imm + 0x800) >> 32)) { + if (((imm + 0x800) >> 12) != 0) + { + emitIns_R_I(INS_lui, size, reg, ((imm + 0x800) >> 12)); + if ((imm & 0xFFF) != 0) + { + emitIns_R_R_I(INS_addiw, size, reg, reg, imm & 0xFFF); + } + } + else + { + emitIns_R_R_I(INS_addiw, size, reg, REG_R0, imm & 0xFFF); + } + } + else + { + UINT32 high = (imm >> 32) & 0xffffffff; + if (((high + 0x800) >> 12) != 0) + { + emitIns_R_I(INS_lui, size, reg, ((high + 0x800) >> 12)); + if ((high & 0xFFF) != 0) + { + emitIns_R_R_I(INS_addi, size, reg, reg, high & 0xFFF); + } + } + else if ((high & 0xFFF) != 0) + { + emitIns_R_R_I(INS_addi, size, reg, REG_R0, high & 0xFFF); + } + UINT32 low = imm & 0xffffffff; + emitIns_R_R_I(INS_slli, size, reg, reg, 11); + emitIns_R_R_I(INS_addi, size, reg, reg, (low >> 21) & 0x7FF); + + emitIns_R_R_I(INS_slli, size, reg, reg, 11); + emitIns_R_R_I(INS_addi, size, reg, reg, (low >> 10) & 0x7FF); + + emitIns_R_R_I(INS_slli, size, reg, reg, 10); + emitIns_R_R_I(INS_addi, size, reg, reg, low & 0x3FF); + } +} + +/***************************************************************************** + * + * Add a call instruction (direct or indirect). + * argSize<0 means that the caller will pop the arguments + * + * The other arguments are interpreted depending on callType as shown: + * Unless otherwise specified, ireg,xreg,xmul,disp should have default values. + * + * EC_FUNC_TOKEN : addr is the method address + * + * If callType is one of these emitCallTypes, addr has to be NULL. + * EC_INDIR_R : "call ireg". + * + */ + +void emitter::emitIns_Call(EmitCallType callType, + CORINFO_METHOD_HANDLE methHnd, + INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE + void* addr, + ssize_t argSize, + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + VARSET_VALARG_TP ptrVars, + regMaskTP gcrefRegs, + regMaskTP byrefRegs, + const DebugInfo& di /* = DebugInfo() */, + regNumber ireg /* = REG_NA */, + regNumber xreg /* = REG_NA */, + unsigned xmul /* = 0 */, + ssize_t disp /* = 0 */, + bool isJump /* = false */) +{ + /* Sanity check the arguments depending on callType */ + + assert(callType < EC_COUNT); + assert((callType != EC_FUNC_TOKEN) || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0)); + assert(callType < EC_INDIR_R || addr == NULL); + assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0)); + + // RISCV64 never uses these + assert(xreg == REG_NA && xmul == 0 && disp == 0); + + // Our stack level should be always greater than the bytes of arguments we push. Just + // a sanity test. + assert((unsigned)abs(argSize) <= codeGen->genStackLevel); + + // Trim out any callee-trashed registers from the live set. + regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd); + gcrefRegs &= savedSet; + byrefRegs &= savedSet; + +#ifdef DEBUG + if (EMIT_GC_VERBOSE) + { + printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars)); + dumpConvertedVarSet(emitComp, ptrVars); + printf(", gcrefRegs="); + printRegMaskInt(gcrefRegs); + emitDispRegSet(gcrefRegs); + printf(", byrefRegs="); + printRegMaskInt(byrefRegs); + emitDispRegSet(byrefRegs); + printf("\n"); + } +#endif + + /* Managed RetVal: emit sequence point for the call */ + if (emitComp->opts.compDbgInfo && di.GetLocation().IsValid()) + { + codeGen->genIPmappingAdd(IPmappingDscKind::Normal, di, false); + } + + /* + We need to allocate the appropriate instruction descriptor based + on whether this is a direct/indirect call, and whether we need to + record an updated set of live GC variables. + */ + instrDesc* id; + + assert(argSize % REGSIZE_BYTES == 0); + int argCnt = (int)(argSize / (int)REGSIZE_BYTES); + + if (callType >= EC_INDIR_R) + { + /* Indirect call, virtual calls */ + + assert(callType == EC_INDIR_R); + + id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize); + } + else + { + /* Helper/static/nonvirtual/function calls (direct or through handle), + and calls to an absolute addr. */ + + assert(callType == EC_FUNC_TOKEN); + + id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize); + } + + /* Update the emitter's live GC ref sets */ + + VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars); + emitThisGCrefRegs = gcrefRegs; + emitThisByrefRegs = byrefRegs; + + id->idSetIsNoGC(emitNoGChelper(methHnd)); + + /* Set the instruction - special case jumping a function */ + instruction ins; + + ins = INS_jalr; // jalr + id->idIns(ins); + + id->idInsOpt(INS_OPTS_C); + // TODO-RISCV64: maybe optimize. + + // INS_OPTS_C: placeholders. 1/2/4-ins: + // if (callType == EC_INDIR_R) + // jalr REG_R0/REG_RA, ireg, 0 <---- 1-ins + // else if (callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR) + // if reloc: + // //pc + offset_38bits # only when reloc. + // auipc t2, addr-hi20 + // jalr r0/1, t2, addr-lo12 + // + // else: + // lui t2, dst_offset_lo32-hi + // ori t2, t2, dst_offset_lo32-lo + // lui t2, dst_offset_hi32-lo + // jalr REG_R0/REG_RA, t2, 0 + + /* Record the address: method, indirection, or funcptr */ + if (callType == EC_INDIR_R) + { + /* This is an indirect call (either a virtual call or func ptr call) */ + // assert(callType == EC_INDIR_R); + + id->idSetIsCallRegPtr(); + + regNumber reg_jalr = isJump ? REG_R0 : REG_RA; + id->idReg4(reg_jalr); + id->idReg3(ireg); // NOTE: for EC_INDIR_R, using idReg3. + assert(xreg == REG_NA); + + id->idCodeSize(4); + } + else + { + /* This is a simple direct call: "call helper/method/addr" */ + + assert(callType == EC_FUNC_TOKEN); + assert(addr != NULL); + // assert((((size_t)addr) & 3) == 0); // TODO NEED TO CHECK ALIGNMENT (ex. Address of RNGCHKFAIL is 0x2033b32 in my test.) + + addr = (void*)(((size_t)addr) + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jirl ra/r0,rd,0 + id->idAddr()->iiaAddr = (BYTE*)addr; + + if (emitComp->opts.compReloc) + { + id->idSetIsDspReloc(); + id->idCodeSize(8); // TODO NEED TO CHECK LATER + } + else + { + id->idCodeSize(32); // TODO NEED TO CHECK LATER // UPDATED BASED on CALLINSTRSIZE in emitOutputCall 6 << 2 + } + } + +#ifdef DEBUG + if (EMIT_GC_VERBOSE) + { + if (id->idIsLargeCall()) + { + printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum, + VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars)); + } + } + + id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token + id->idDebugOnlyInfo()->idCallSig = sigInfo; +#endif // DEBUG + +#ifdef LATE_DISASM + if (addr != nullptr) + { + codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd); + } +#endif // LATE_DISASM + + appendToCurIG(id); +} + +/***************************************************************************** + * + * Output a call instruction. + */ + +unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code) +{ + unsigned char callInstrSize = sizeof(code_t); // 4 bytes + regMaskTP gcrefRegs; + regMaskTP byrefRegs; + + VARSET_TP GCvars(VarSetOps::UninitVal()); + + // Is this a "fat" call descriptor? + if (id->idIsLargeCall()) + { + instrDescCGCA* idCall = (instrDescCGCA*)id; + gcrefRegs = idCall->idcGcrefRegs; + byrefRegs = idCall->idcByrefRegs; + VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars); + } + else + { + assert(!id->idIsLargeDsp()); + assert(!id->idIsLargeCns()); + + gcrefRegs = emitDecodeCallGCregs(id); + byrefRegs = 0; + VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp)); + } + + /* We update the GC info before the call as the variables cannot be + used by the call. Killing variables before the call helps with + boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029. + If we ever track aliased variables (which could be used by the + call), we would have to keep them alive past the call. */ + + emitUpdateLiveGCvars(GCvars, dst); +#ifdef DEBUG + // NOTEADD: + // Output any delta in GC variable info, corresponding to the before-call GC var updates done above. + if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC) + { + emitDispGCVarDelta(); // define in emit.cpp + } +#endif // DEBUG + + assert(id->idIns() == INS_jalr); + if (id->idIsCallRegPtr()) + { // EC_INDIR_R + code = emitInsCode(id->idIns()); + code |= (code_t)id->idReg4() << 7; + code |= (code_t)id->idReg3() << 15; + // the offset default is 0; + emitOutput_Instr(dst, code); + } + else if (id->idIsReloc()) + { + // pc + offset_32bits + // + // auipc t2, addr-hi20 + // jalr r0/1,t2,addr-lo12 + + emitOutput_Instr(dst, 0x00000397); + + size_t addr = (size_t)(id->idAddr()->iiaAddr); // get addr. + + int reg2 = ((int)addr & 1) + 10; + addr = addr ^ 1; + + assert(isValidSimm32(addr - (ssize_t)dst)); + assert((addr & 1) == 0); + + dst += 4; + emitGCregDeadUpd(REG_T2, dst); + +#ifdef DEBUG + code = emitInsCode(INS_auipc); + assert((code | (7 << 7)) == 0x00000397); + assert((int)REG_T2 == 7); + code = emitInsCode(INS_jalr); + assert(code == 0x00000067); +#endif + emitOutput_Instr(dst, 0x00000067 | (7 << 15) | reg2 << 7); + + emitRecordRelocation(dst - 4, (BYTE*)addr, IMAGE_REL_RISCV64_JALR); + // TODO CHECK HOW TO PATCH RELOCATION ADDRESS + } + else + { + // lui t2, dst_offset_hi32-hi + // addi t2, t2, dst_offset_hi32-lo + // slli t2, t2, 11 + // addi t2, t2, dst_offset_low32-hi + // slli t2, t2, 11 + // addi t2, t2, dst_offset_low32-md + // slli t2, t2, 10 + // jalr t2 + + + ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr); + assert((imm >> 32) <= 0xff); + + int reg2 = (int)(imm & 1); + imm -= reg2; + + UINT32 high = imm >> 32; + code = emitInsCode(INS_lui); + code |= (code_t)REG_T2 << 7; + code |= ((code_t)((high + 0x800) >> 12) & 0xfffff) << 12; + emitOutput_Instr(dst, code); + dst += 4; + + emitGCregDeadUpd(REG_T2, dst); + + code = emitInsCode(INS_addi); + code |= (code_t)REG_T2 << 7; + code |= (code_t)REG_T2 << 15; + code |= (code_t)(high & 0xfff) << 20; + emitOutput_Instr(dst, code); + dst += 4; + + code = emitInsCode(INS_slli); + code |= (code_t)REG_T2 << 7; + code |= (code_t)REG_T2 << 15; + code |= (code_t)(11 << 20); + emitOutput_Instr(dst, code); + dst += 4; + + UINT32 low = imm & 0xffffffff; + + code = emitInsCode(INS_addi); + code |= (code_t)REG_T2 << 7; + code |= (code_t)REG_T2 << 15; + code |= ((low >> 21) & 0x7ff) << 20; + emitOutput_Instr(dst, code); + dst += 4; + + code = emitInsCode(INS_slli); + code |= (code_t)REG_T2 << 7; + code |= (code_t)REG_T2 << 15; + code |= (code_t)(11 << 20); + emitOutput_Instr(dst, code); + dst += 4; + + code = emitInsCode(INS_addi); + code |= (code_t)REG_T2 << 7; + code |= (code_t)REG_T2 << 15; + code |= ((low >> 10) & 0x7ff) << 20; + emitOutput_Instr(dst, code); + dst += 4; + + code = emitInsCode(INS_slli); + code |= (code_t)REG_T2 << 7; + code |= (code_t)REG_T2 << 15; + code |= (code_t)(10 << 20); + emitOutput_Instr(dst, code); + dst += 4; + + code = emitInsCode(INS_jalr); + code |= (code_t)(reg2 & 0x1f) << 7; + code |= (code_t)REG_T2 << 15; + code |= (low & 0x3ff) << 20; + // the offset default is 0; + emitOutput_Instr(dst, code); + } + + dst += 4; + + // If the method returns a GC ref, mark INTRET (A0) appropriately. + if (id->idGCref() == GCT_GCREF) + { + gcrefRegs |= RBM_INTRET; + } + else if (id->idGCref() == GCT_BYREF) + { + byrefRegs |= RBM_INTRET; + } + + // If is a multi-register return method is called, mark INTRET_1 (A1) appropriately + if (id->idIsLargeCall()) + { + instrDescCGCA* idCall = (instrDescCGCA*)id; + if (idCall->idSecondGCref() == GCT_GCREF) + { + gcrefRegs |= RBM_INTRET_1; + } + else if (idCall->idSecondGCref() == GCT_BYREF) + { + byrefRegs |= RBM_INTRET_1; + } + } + + // If the GC register set has changed, report the new set. + if (gcrefRegs != emitThisGCrefRegs) + { + emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst); + } + // If the Byref register set has changed, report the new set. + if (byrefRegs != emitThisByrefRegs) + { + emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst); + } + + // Some helper calls may be marked as not requiring GC info to be recorded. + if (!id->idIsNoGC()) + { + // On RISCV64, as on AMD64 and LOONGARCH64, we don't change the stack pointer to push/pop args. + // So we're not really doing a "stack pop" here (note that "args" is 0), but we use this mechanism + // to record the call for GC info purposes. (It might be best to use an alternate call, + // and protect "emitStackPop" under the EMIT_TRACK_STACK_DEPTH preprocessor variable.) + emitStackPop(dst, /*isCall*/ true, callInstrSize, /*args*/ 0); + + // Do we need to record a call location for GC purposes? + // + if (!emitFullGCinfo) + { + emitRecordGCcall(dst, callInstrSize); + } + } + if (id->idIsCallRegPtr()) + { + callInstrSize = 1 << 2; + } + else + { + callInstrSize = id->idIsReloc() ? (2 << 2) : (8 << 2); // INS_OPTS_C: 2/9-ins. + + } + + return callInstrSize; +} + +void emitter::emitJumpDistBind() // TODO NEED TO CHECK WHAT NUMBERS MEAN +{ +#ifdef DEBUG + if (emitComp->verbose) + { + printf("*************** In emitJumpDistBind()\n"); + } + if (EMIT_INSTLIST_VERBOSE) + { + printf("\nInstruction list before jump distance binding:\n\n"); + emitDispIGlist(true); + } +#endif + + instrDescJmp* jmp; + + UNATIVE_OFFSET adjIG; + UNATIVE_OFFSET adjSJ; + insGroup* lstIG; +#ifdef DEBUG + insGroup* prologIG = emitPrologIG; +#endif // DEBUG + + // NOTE: + // bit0 of isLinkingEnd_LA: indicating whether updating the instrDescJmp's size with the type INS_OPTS_J; + // bit1 of isLinkingEnd_LA: indicating not needed updating the size while emitTotalCodeSize <= (0x7fff << 2) or had + // updated; + unsigned int isLinkingEnd_LA = emitTotalCodeSize <= (0x7fff << 2) ? 2 : 0; + + UNATIVE_OFFSET ssz = 0; // relative small jump's delay-slot. + // small jump max. neg distance + NATIVE_OFFSET nsd = B_DIST_SMALL_MAX_NEG; + // small jump max. pos distance + NATIVE_OFFSET psd = + B_DIST_SMALL_MAX_POS - + emitCounts_INS_OPTS_J * (3 << 2); // the max placeholder sizeof(INS_OPTS_JALR) - sizeof(INS_OPTS_J). + +/*****************************************************************************/ +/* If the default small encoding is not enough, we start again here. */ +/*****************************************************************************/ + +AGAIN: + +#ifdef DEBUG + emitCheckIGList(); +#endif + +#ifdef DEBUG + insGroup* lastIG = nullptr; + instrDescJmp* lastSJ = nullptr; +#endif + + lstIG = nullptr; + adjSJ = 0; + adjIG = 0; + + for (jmp = emitJumpList; jmp; jmp = jmp->idjNext) + { + insGroup* jmpIG; + insGroup* tgtIG; + + UNATIVE_OFFSET jsz; // size of the jump instruction in bytes + + NATIVE_OFFSET extra; // How far beyond the short jump range is this jump offset? + UNATIVE_OFFSET srcInstrOffs; // offset of the source instruction of the jump + UNATIVE_OFFSET srcEncodingOffs; // offset of the source used by the instruction set to calculate the relative + // offset of the jump + UNATIVE_OFFSET dstOffs; + NATIVE_OFFSET jmpDist; // the relative jump distance, as it will be encoded + +/* Make sure the jumps are properly ordered */ + +#ifdef DEBUG + assert(lastSJ == nullptr || lastIG != jmp->idjIG || lastSJ->idjOffs < (jmp->idjOffs + adjSJ)); + lastSJ = (lastIG == jmp->idjIG) ? jmp : nullptr; + + assert(lastIG == nullptr || lastIG->igNum <= jmp->idjIG->igNum || jmp->idjIG == prologIG || + emitNxtIGnum > unsigned(0xFFFF)); // igNum might overflow + lastIG = jmp->idjIG; +#endif // DEBUG + + /* Get hold of the current jump size */ + + jsz = jmp->idCodeSize(); + + /* Get the group the jump is in */ + + jmpIG = jmp->idjIG; + + /* Are we in a group different from the previous jump? */ + + if (lstIG != jmpIG) + { + /* Were there any jumps before this one? */ + + if (lstIG) + { + /* Adjust the offsets of the intervening blocks */ + + do + { + lstIG = lstIG->igNext; + assert(lstIG); +#ifdef DEBUG + if (EMITVERBOSE) + { + printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs, + lstIG->igOffs + adjIG); + } +#endif // DEBUG + lstIG->igOffs += adjIG; + assert(IsCodeAligned(lstIG->igOffs)); + } while (lstIG != jmpIG); + } + + /* We've got the first jump in a new group */ + adjSJ = 0; + lstIG = jmpIG; + } + + /* Apply any local size adjustment to the jump's relative offset */ + jmp->idjOffs += adjSJ; + + // If this is a jump via register, the instruction size does not change, so we are done. + CLANG_FORMAT_COMMENT_ANCHOR; + + /* Have we bound this jump's target already? */ + + if (jmp->idIsBound()) + { + /* Does the jump already have the smallest size? */ + + if (jmp->idjShort) + { + // We should not be jumping/branching across funclets/functions + emitCheckFuncletBranch(jmp, jmpIG); + + continue; + } + + tgtIG = jmp->idAddr()->iiaIGlabel; + } + else + { + /* First time we've seen this label, convert its target */ + CLANG_FORMAT_COMMENT_ANCHOR; + + tgtIG = (insGroup*)emitCodeGetCookie(jmp->idAddr()->iiaBBlabel); + +#ifdef DEBUG + if (EMITVERBOSE) + { + if (tgtIG) + { + printf(" to %s\n", emitLabelString(tgtIG)); + } + else + { + printf("-- ERROR, no emitter cookie for " FMT_BB "; it is probably missing BBF_HAS_LABEL.\n", + jmp->idAddr()->iiaBBlabel->bbNum); + } + } + assert(tgtIG); +#endif // DEBUG + + /* Record the bound target */ + + jmp->idAddr()->iiaIGlabel = tgtIG; + jmp->idSetIsBound(); + } + + // We should not be jumping/branching across funclets/functions + emitCheckFuncletBranch(jmp, jmpIG); + + /* + In the following distance calculations, if we're not actually + scheduling the code (i.e. reordering instructions), we can + use the actual offset of the jump (rather than the beg/end of + the instruction group) since the jump will not be moved around + and thus its offset is accurate. + + First we need to figure out whether this jump is a forward or + backward one; to do this we simply look at the ordinals of the + group that contains the jump and the target. + */ + + srcInstrOffs = jmpIG->igOffs + jmp->idjOffs; + + /* Note that the destination is always the beginning of an IG, so no need for an offset inside it */ + dstOffs = tgtIG->igOffs; + + srcEncodingOffs = srcInstrOffs + ssz; // Encoding offset of relative offset for small branch + + if (jmpIG->igNum < tgtIG->igNum) + { + /* Forward jump */ + + /* Adjust the target offset by the current delta. This is a worst-case estimate, as jumps between + here and the target could be shortened, causing the actual distance to shrink. + */ + + dstOffs += adjIG; + + /* Compute the distance estimate */ + + jmpDist = dstOffs - srcEncodingOffs; + + /* How much beyond the max. short distance does the jump go? */ + + extra = jmpDist - psd; + +#if DEBUG_EMIT + assert(jmp->idDebugOnlyInfo() != nullptr); + if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) + { + if (INTERESTING_JUMP_NUM == 0) + { + printf("[1] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum); + } + printf("[1] Jump block is at %08X\n", jmpIG->igOffs); + printf("[1] Jump reloffset is %04X\n", jmp->idjOffs); + printf("[1] Jump source is at %08X\n", srcEncodingOffs); + printf("[1] Label block is at %08X\n", dstOffs); + printf("[1] Jump dist. is %04X\n", jmpDist); + if (extra > 0) + { + printf("[1] Dist excess [S] = %d \n", extra); + } + } + if (EMITVERBOSE) + { + printf("Estimate of fwd jump [%08X/%03u]: %04X -> %04X = %04X\n", dspPtr(jmp), + jmp->idDebugOnlyInfo()->idNum, srcInstrOffs, dstOffs, jmpDist); + } +#endif // DEBUG_EMIT + + assert(jmpDist >= 0); // Forward jump + assert(!(jmpDist & 0x3)); + + if (isLinkingEnd_LA & 0x2) + { + jmp->idAddr()->iiaSetJmpOffset(jmpDist); + } + else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J)) + { + instruction ins = jmp->idIns(); + assert((INS_jal <= ins) && (ins <= INS_bgeu)); + + if (ins > + INS_jalr ) // jal < beqz < bnez < jalr < beq/bne/blt/bltu/bge/bgeu + { + if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000) + { + extra = 4; + } + else + { + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + extra = 8; + } + } + else if (ins > INS_j && ins < INS_jalr) // jal < beqz < bnez < jalr < beq/bne/blt/bltu/bge/bgeu + { + if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000) + continue; + + extra = 4; + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + } + else + { + assert(ins == INS_jal || ins == INS_jalr); + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + continue; + } + + jmp->idInsOpt(INS_OPTS_JALR); + jmp->idCodeSize(jmp->idCodeSize() + extra); + jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JALR) - sizeof(INS_OPTS_J). + adjSJ += (UNATIVE_OFFSET)extra; + adjIG += (UNATIVE_OFFSET)extra; + emitTotalCodeSize += (UNATIVE_OFFSET)extra; + jmpIG->igFlags |= IGF_UPD_ISZ; + isLinkingEnd_LA |= 0x1; + } + continue; + } + else + { + /* Backward jump */ + + /* Compute the distance estimate */ + + jmpDist = srcEncodingOffs - dstOffs; + + /* How much beyond the max. short distance does the jump go? */ + + extra = jmpDist + nsd; + +#if DEBUG_EMIT + assert(jmp->idDebugOnlyInfo() != nullptr); + if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) + { + if (INTERESTING_JUMP_NUM == 0) + { + printf("[2] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum); + } + printf("[2] Jump block is at %08X\n", jmpIG->igOffs); + printf("[2] Jump reloffset is %04X\n", jmp->idjOffs); + printf("[2] Jump source is at %08X\n", srcEncodingOffs); + printf("[2] Label block is at %08X\n", dstOffs); + printf("[2] Jump dist. is %04X\n", jmpDist); + if (extra > 0) + { + printf("[2] Dist excess [S] = %d \n", extra); + } + } + if (EMITVERBOSE) + { + printf("Estimate of bwd jump [%08X/%03u]: %04X -> %04X = %04X\n", dspPtr(jmp), + jmp->idDebugOnlyInfo()->idNum, srcInstrOffs, dstOffs, jmpDist); + } +#endif // DEBUG_EMIT + + assert(jmpDist >= 0); // Backward jump + assert(!(jmpDist & 0x3)); + + if (isLinkingEnd_LA & 0x2) + { + jmp->idAddr()->iiaSetJmpOffset(-jmpDist); // Backward jump is negative! + } + else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J)) + { + instruction ins = jmp->idIns(); + assert((INS_jal <= ins) && (ins <= INS_bgeu)); + + if (ins > + INS_jalr ) // jal < beqz < bnez < jalr < beq/bne/blt/bltu/bge/bgeu + { + if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000) + { + extra = 4; + } + else + { + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + extra = 8; + } + } + else if (ins < INS_jalr && ins > INS_j) // jal < beqz < bnez < jalr < beq/bne/blt/bltu/bge/bgeu + { + if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000) + continue; + + extra = 4; + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + } + else + { + assert(ins == INS_jal || ins == INS_jalr); + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + continue; + } + + jmp->idInsOpt(INS_OPTS_JALR); + jmp->idCodeSize(jmp->idCodeSize() + extra); + jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JALR) - sizeof(INS_OPTS_J). + adjSJ += (UNATIVE_OFFSET)extra; + adjIG += (UNATIVE_OFFSET)extra; + emitTotalCodeSize += (UNATIVE_OFFSET)extra; + jmpIG->igFlags |= IGF_UPD_ISZ; + isLinkingEnd_LA |= 0x1; + } + continue; + } + } // end for each jump + + if ((isLinkingEnd_LA & 0x3) < 0x2) + { + // indicating the instrDescJmp's size of the type INS_OPTS_J had updated + // after the first round and should iterate again to update. + isLinkingEnd_LA = 0x2; + + // Adjust offsets of any remaining blocks. + for (; lstIG;) + { + lstIG = lstIG->igNext; + if (!lstIG) + { + break; + } +#ifdef DEBUG + if (EMITVERBOSE) + { + printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs, + lstIG->igOffs + adjIG); + } +#endif // DEBUG + + lstIG->igOffs += adjIG; + + assert(IsCodeAligned(lstIG->igOffs)); + } + goto AGAIN; + } + +#ifdef DEBUG + if (EMIT_INSTLIST_VERBOSE) + { + printf("\nLabels list after the jump dist binding:\n\n"); + emitDispIGlist(false); + } + + emitCheckIGList(); +#endif // DEBUG +} + +/***************************************************************************** + * + * Emit a 32-bit RISCV64 instruction + */ + +/*static*/ unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code) +{ + assert(sizeof(code_t) == 4); + BYTE* dstRW = dst + writeableOffset; + *((code_t*)dstRW) = code; + + return sizeof(code_t); +} + +/***************************************************************************** +* + * Append the machine code corresponding to the given instruction descriptor + * to the code block at '*dp'; the base of the code block is 'bp', and 'ig' + * is the instruction group that contains the instruction. Updates '*dp' to + * point past the generated code, and returns the size of the instruction + * descriptor in bytes. + */ + +size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) +{ + BYTE* dstRW = *dp + writeableOffset; + BYTE* dstRW2 = dstRW + 4; // addr for updating gc info if needed. + code_t code = 0; + instruction ins; + size_t sz; // = emitSizeOfInsDsc(id); + +#ifdef DEBUG +#if DUMP_GC_TABLES + bool dspOffs = emitComp->opts.dspGCtbls; +#else + bool dspOffs = !emitComp->opts.disDiffable; +#endif +#endif // DEBUG + + assert(REG_NA == (int)REG_NA); + + insOpts insOp = id->idInsOpt(); + + switch (insOp) + { + case INS_OPTS_RELOC: + { + regNumber reg1 = id->idReg1(); + + *(code_t*)dstRW = 0x00000017 | (code_t)(reg1 << 7); + + dstRW += 4; + +#ifdef DEBUG + code = emitInsCode(INS_auipc); + assert(code == 0x00000017); + code = emitInsCode(INS_addi); + assert(code == 0x00000013); + code = emitInsCode(INS_ld); + assert(code == 0x00003003); +#endif + + if (id->idIsCnsReloc()) + { + ins = INS_addi; + *(code_t*)dstRW = 0x00000013 | (code_t)(reg1 << 7) | (code_t)(reg1 << 15); + } + else + { + assert(id->idIsDspReloc()); + ins = INS_ld; + *(code_t*)dstRW = 0x00003003 | (code_t)(reg1 << 7) | (code_t)(reg1 << 15); + } + + dstRW += 4; + + emitRecordRelocation(dstRW - 8 - writeableOffset, id->idAddr()->iiaAddr, IMAGE_REL_RISCV64_PC); + + sz = sizeof(instrDesc); + } + break; + case INS_OPTS_I: + { + ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr); + regNumber reg1 = id->idReg1(); + + switch (id->idCodeSize()) + { + case 8: + { + if (id->idReg2()) + { // special for INT64_MAX or UINT32_MAX; + code = emitInsCode(INS_addi); + code |= (code_t)reg1 << 7; + code |= (code_t)REG_R0 << 15; + code |= 0xfff << 10; + + *(code_t*)dstRW = code; + dstRW += 4; + + ssize_t ui6 = (imm == INT64_MAX) ? 1 : 32; + code = emitInsCode(INS_srli); + code |= ((code_t)(reg1 << 7) | ((code_t)(reg1 << 15)) | (ui6 << 20)); + *(code_t*)dstRW = code; + } + else + { + code = emitInsCode(INS_lui); + code |= (code_t)(reg1 << 7); + code |= ((code_t)((imm + 0x800) >> 12) & 0xfffff) << 12; + + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(INS_addi); + code |= (code_t)reg1 << 7; + code |= (code_t)reg1 << 15; + code |= (code_t)(imm & 0xfff) << 20; + *(code_t*)dstRW = code; + } + break; + } + case 32: + { + ssize_t high = (imm >> 32) & 0xffffffff; + code = emitInsCode(INS_lui); + code |= (code_t)reg1 << 7; + code |= ((code_t)((high + 0x800) >> 12) & 0xfffff) << 12; + + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(INS_addi); + code |= (code_t)reg1 << 7; + code |= (code_t)reg1 << 15; + code |= (code_t)(high & 0xfff) << 20; + *(code_t*)dstRW = code; + dstRW += 4; + + ssize_t low = imm & 0xffffffff; + + code = emitInsCode(INS_slli); + code |= (code_t) reg1 << 7; + code |= (code_t) reg1 << 15; + code |= (code_t) 11 << 20; + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(INS_addi); + code |= (code_t)reg1 << 7; + code |= (code_t)reg1 << 15; + code |= (code_t)((low >> 21) & 0x7ff) << 20; + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(INS_slli); + code |= (code_t) reg1 << 7; + code |= (code_t) reg1 << 15; + code |= (code_t) 11 << 20; + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(INS_addi); + code |= (code_t)reg1 << 7; + code |= (code_t)reg1 << 15; + code |= (code_t)((low >> 10) & 0x7ff) << 20; + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(INS_slli); + code |= (code_t) reg1 << 7; + code |= (code_t) reg1 << 15; + code |= (code_t) 10 << 20; + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(INS_addi); + code |= (code_t)reg1 << 7; + code |= (code_t)reg1 << 15; + code |= (code_t)((low) & 0x3ff) << 20; + *(code_t*)dstRW = code; + break; + } + default: + unreached(); + break; + } + + ins = INS_addi; + dstRW += 4; + + sz = sizeof(instrDesc); + } + break; + case INS_OPTS_RC: + { + // Reference to JIT data + assert(id->idAddr()->iiaIsJitDataOffset()); + assert(id->idGCref() == GCT_NONE); + + int doff = id->idAddr()->iiaGetJitDataOffset(); + assert(doff >= 0); + + ssize_t imm = emitGetInsSC(id); + assert((imm >= 0) && (imm < 0x4000)); // 0x4000 is arbitrary, currently 'imm' is always 0. + + unsigned dataOffs = (unsigned)(doff + imm); + + assert(dataOffs < emitDataSize()); + + ins = id->idIns(); + regNumber reg1 = id->idReg1(); + + if (id->idIsReloc()) + { + // get the addr-offset of the data. + imm = (ssize_t)emitConsBlock - (ssize_t)(dstRW - writeableOffset) + dataOffs; + assert(imm > 0); + assert(!(imm & 3)); + + + doff = (int)(imm & 0xfff); + assert(isValidSimm20((imm + 0x800) >> 12)); + +#ifdef DEBUG + code = emitInsCode(INS_auipc); + assert(code == 0x00000017); +#endif + code = 0x00000017 | (1 << 7); // TODO R21 => RA + *(code_t*)dstRW = code | ((code_t)((imm + 0x800) & 0xfffff000)); + dstRW += 4; + + if (ins == INS_jal) + { + assert(isGeneralRegister(reg1)); + ins = INS_addi; +#ifdef DEBUG + code = emitInsCode(INS_addi); + assert(code == 0x00000013); +#endif + code = 0x00000013 | (1 << 15); + *(code_t*)dstRW = code | ((code_t)(reg1 & 0x1f) << 7) | (((code_t)doff & 0xfff) << 20); + } + else + { + code = emitInsCode(ins); + code |= (code_t)((reg1 & 0x1f) << 7); + code |= (code_t)REG_RA << 15; // NOTE:here must be REG_R21 !!! // TODO R21 => RA + code |= (code_t)(doff & 0xfff) << 20; + *(code_t*)dstRW = code; + } + dstRW += 4; + } + else + { + // get the addr of the data. + imm = (ssize_t)emitConsBlock + dataOffs; + + code = emitInsCode(INS_lui); + if (ins == INS_jal) + { + assert((imm >> 40) == 0); + + doff = imm & 0x7ff; + + UINT32 high = imm >> 11; + + code |= (code_t)REG_RA << 7; // TODO R21 => RA + code |= (code_t)(((high + 0x800) >> 12) << 12); + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(INS_addi); + code |= (code_t)REG_RA << 7; + code |= (code_t)REG_RA << 15; + code |= (code_t)(high & 0xFFF) << 20; + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(INS_slli); + code |= (code_t)REG_RA << 7; + code |= (code_t)REG_RA << 15; + code |= (code_t)11 << 20; + *(code_t*)dstRW = code; + dstRW += 4; + + ins = INS_addi; + code = emitInsCode(INS_addi); + code |= (code_t)reg1 << 7; + code |= (code_t)REG_RA << 15; + code |= (code_t)doff << 20; + *(code_t*)dstRW = code; + dstRW += 4; + } + else + { + assert((imm >> 40) == 0); + + doff = imm & 0x7ff; + UINT32 high = imm >> 11; + + code |= (code_t)(REG_RA << 7); // TODO CHECK R21 => RA + code |= (code_t)(((high + 0x800) >> 12) << 12); + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(INS_addi); + code |= (code_t)REG_RA << 7; + code |= (code_t)REG_RA << 15; + code |= (code_t)(high & 0xFFF) << 20; + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(INS_slli); + code |= (code_t)REG_RA << 7; + code |= (code_t)REG_RA << 15; + code |= (code_t)11 << 20; + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(ins); + code |= (code_t)(reg1 & 0x1f) << 7; + code |= (code_t)REG_RA << 15; + code |= (code_t)doff << 20; + *(code_t*)dstRW = code; + dstRW += 4; + } + } + + sz = sizeof(instrDesc); + } + break; + + case INS_OPTS_RL: + { + insGroup* tgtIG = (insGroup*)emitCodeGetCookie(id->idAddr()->iiaBBlabel); + id->idAddr()->iiaIGlabel = tgtIG; + + regNumber reg1 = id->idReg1(); + assert(isGeneralRegister(reg1)); + + if (id->idIsReloc()) + { + ssize_t imm = (ssize_t)tgtIG->igOffs; + imm = (ssize_t)emitCodeBlock + imm - (ssize_t)(dstRW - writeableOffset); + assert((imm & 3) == 0); + + int doff = (int)(imm & 0xfff); + assert(isValidSimm20((imm + 0x800) >> 12)); + + code = 0x00000017; + *(code_t*)dstRW = code | (code_t)reg1 << 7 | ((imm + 0x800)& 0xfffff000); + dstRW += 4; +#ifdef DEBUG + code = emitInsCode(INS_auipc); + assert(code == 0x00000017); + code = emitInsCode(INS_addi); + assert(code == 0x00000013); +#endif + ins = INS_addi; + *(code_t*)dstRW = 0x00000013 | ((code_t)reg1 << 7) | ((code_t)reg1 << 15) | ((doff & 0xfff) << 20); + } + else + { + ssize_t imm = (ssize_t)tgtIG->igOffs + (ssize_t)emitCodeBlock; + // assert((imm >> 32) == 0xff); + assert((imm >> (32 + 20)) == 0); + + code = emitInsCode(INS_lui); + code |= (code_t)REG_RA << 7; // TODO CHECK R21 => RA + code |= ((code_t)((imm + 0x800) >> 12) & 0xfffff) << 12; + + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(INS_addi); + code |= (code_t)REG_RA << 7; + code |= (code_t)REG_RA << 15; + code |= (code_t)(imm & 0xfff) << 20; + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(INS_addi); + code |= (code_t)reg1 << 7; + code |= (((imm + 0x80000800) >> 32) & 0xfff) << 20; + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(INS_slli); + code |= (code_t)reg1 << 7; + code |= (code_t)reg1 << 15; + code |= (code_t)32 << 20; + *(code_t*)dstRW = code; + dstRW += 4; + + ins = INS_add; + code = emitInsCode(INS_add); + code |= (code_t)reg1 << 7; + code |= (code_t)reg1 << 15; + code |= (code_t)REG_RA << 20; + *(code_t*)dstRW = code; + } + + dstRW += 4; + + sz = sizeof(instrDesc); + } + break; + case INS_OPTS_JALR: + { + instrDescJmp* jmp = (instrDescJmp*)id; + + regNumber reg1 = id->idReg1(); + { + ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); + imm -= 4; + + assert((imm & 0x3) == 0); + + ins = jmp->idIns(); + assert(jmp->idCodeSize() > 4); // The original INS_OPTS_JIRL: not used by now!!! + switch (jmp->idCodeSize()) + { + case 8: + { + regNumber reg2 = id->idReg2(); + // assert((INS_bceqz <= ins) && (ins <= INS_bgeu)); + + if ((INS_beq == ins) || (INS_bne == ins)) + { + if ((-0x1000 <= imm) && (imm < 0x1000)) + { + code = emitInsCode(INS_xor); + code |= (code_t)REG_RA << 7; // TODO R21 => RA + code |= (code_t)reg1 << 15; + code |= (code_t)reg2 << 20; + + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(ins); + code |= (code_t)REG_RA << 15; // TODO R21 => RA + code |= ((imm >> 11) & 0x1) << 7; + code |= ((imm >> 1) & 0xf) << 8; + code |= ((imm >> 5) & 0x3f) << 25; + code |= ((imm >> 12) & 0x1) << 31; + *(code_t*)dstRW = code; + dstRW += 4; + } + else + { + assert((-0x100000 <= imm) && (imm < 0x100000)); + assert((INS_bne & 0xefff) == INS_beq); + + code = emitInsCode((instruction)((int)ins ^ 0x1000)); + code |= ((code_t)(reg1) /*& 0x1f */) << 15; /* rj */ + code |= ((code_t)(reg2) /*& 0x1f */) << 20; /* rd */ + code |= 0x8 << 7; + *(code_t*)dstRW = code; + dstRW += 4; + + + code = emitInsCode(INS_jal); + code |= ((imm >> 12) & 0xff) << 12; + code |= ((imm >> 11) & 0x1) << 20; + code |= ((imm >> 1) & 0x3ff) << 21; + code |= ((imm >> 20) & 0x1) << 31; + + *(code_t*)dstRW = code; + dstRW += 4; + } + } + else if ((INS_blt <= ins) && (ins <= INS_bgeu)) + { + assert((-0x100000 <= imm) && (imm < 0x100000)); + assert((INS_bge & 0xefff) == INS_blt); + assert((INS_bgeu & 0xefff) == INS_bltu); + + code = emitInsCode((instruction)((int)ins ^ 0x1000)); + code |= ((code_t)(reg1) /*& 0x1f */) << 15; /* rj */ + code |= ((code_t)(reg2) /*& 0x1f */) << 20; /* rd */ + code |= 0x8 << 7; + *(code_t*)dstRW = code; + dstRW += 4; + + code = emitInsCode(INS_jal); + code |= ((imm >> 12) & 0xff) << 12; + code |= ((imm >> 11) & 0x1) << 20; + code |= ((imm >> 1) & 0x3ff) << 21; + code |= ((imm >> 20) & 0x1) << 31; + *(code_t*)dstRW = code; + dstRW += 4; + } + break; + } + + default: + unreached(); + break; + } + } + sz = sizeof(instrDescJmp); + } + break; + case INS_OPTS_J_cond: + { + ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); // get jmp's offset relative delay-slot. + assert((OFFSET_DIST_SMALL_MAX_NEG << 2) <= imm && imm <= (OFFSET_DIST_SMALL_MAX_POS << 2)); + assert(!(imm & 1)); + + ins = id->idIns(); + code = emitInsCode(ins); + code |= ((code_t)id->idReg1()) << 15; + code |= ((code_t)id->idReg2()) << 20; + code |= ((imm >> 11) & 0x1) << 7; + code |= ((imm >> 1) & 0xf) << 8; + code |= ((imm >> 5) & 0x3f) << 25; + code |= ((imm >> 12) & 0x1) << 31; + *(code_t*)dstRW = code; + dstRW += 4; + + sz = sizeof(instrDescJmp); + } + break; + case INS_OPTS_J: + // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl dstRW-relative. + { + ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); // get jmp's offset relative delay-slot. + assert((imm & 3) == 0); + + ins = id->idIns(); + code = emitInsCode(ins); + if (ins == INS_jal) + { + code |= ((imm >> 12) & 0xff) << 12; + code |= ((imm >> 11) & 0x1) << 20; + code |= ((imm >> 1) & 0x3ff) << 21; + code |= ((imm >> 20) & 0x1) << 31; + code |= REG_RA << 7; + } + else if (ins == INS_j) + { + code |= ((imm >> 12) & 0xff) << 12; + code |= ((imm >> 11) & 0x1) << 20; + code |= ((imm >> 1) & 0x3ff) << 21; + code |= ((imm >> 20) & 0x1) << 31; + } + else if (ins == INS_jalr) + { + code |= ((code_t)(imm & 0xfff) << 20); + } + else if (ins == INS_bnez || ins == INS_beqz) + { + code |= (code_t)id->idReg1() << 15; + code |= ((imm >> 11) & 0x1) << 7; + code |= ((imm >> 1) & 0xf) << 8; + code |= ((imm >> 5) & 0x3f) << 25; + code |= ((imm >> 12) & 0x1) << 31; + } + else if ((INS_beq <= ins) && (ins <= INS_bgeu)) + { + code |= ((code_t)id->idReg1()) << 15; + code |= ((code_t)id->idReg2()) << 20; + code |= ((imm >> 1) & 0xf) << 8; + code |= ((imm >> 5) & 0x3f) << 25; + code |= ((imm >> 11) & 0x1) << 7; + code |= ((imm >> 12) & 0x1) << 31; + } + else + { + assert(!"unimplemented on RISCV64 yet"); + } + + *(code_t*)dstRW = code; + dstRW += 4; + + sz = sizeof(instrDescJmp); + } + break; + case INS_OPTS_C: + if (id->idIsLargeCall()) + { + /* Must be a "fat" call descriptor */ + sz = sizeof(instrDescCGCA); + } + else + { + assert(!id->idIsLargeDsp()); + assert(!id->idIsLargeCns()); + sz = sizeof(instrDesc); + } + dstRW += emitOutputCall(ig, *dp, id, 0); + + dstRW2 = dstRW; + ins = INS_nop; + break; + + // case INS_OPTS_NONE: + default: + *(code_t*)dstRW = id->idAddr()->iiaGetInstrEncode(); + dstRW += 4; + ins = id->idIns(); + sz = emitSizeOfInsDsc(id); + break; } + + // Determine if any registers now hold GC refs, or whether a register that was overwritten held a GC ref. + // We assume here that "id->idGCref()" is not GC_NONE only if the instruction described by "id" writes a + // GC ref to register "id->idReg1()". (It may, apparently, also not be GC_NONE in other cases, such as + // for stores, but we ignore those cases here.) + if (emitInsMayWriteToGCReg(ins)) // True if "id->idIns()" writes to a register than can hold GC ref. + { + // We assume that "idReg1" is the primary destination register for all instructions + if (id->idGCref() != GCT_NONE) + { + emitGCregLiveUpd(id->idGCref(), id->idReg1(), dstRW2 - writeableOffset); + } + else + { + emitGCregDeadUpd(id->idReg1(), dstRW2 - writeableOffset); + } + } + + // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC + // ref or overwritten one. + if (emitInsWritesToLclVarStackLoc(id) /*|| emitInsWritesToLclVarStackLocPair(id)*/) + { + int varNum = id->idAddr()->iiaLclVar.lvaVarNum(); + unsigned ofs = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE); + bool FPbased; + int adr = emitComp->lvaFrameAddress(varNum, &FPbased); + if (id->idGCref() != GCT_NONE) + { + emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dstRW2 - writeableOffset DEBUG_ARG(varNum)); + } + else + { + // If the type of the local is a gc ref type, update the liveness. + var_types vt; + if (varNum >= 0) + { + // "Regular" (non-spill-temp) local. + vt = var_types(emitComp->lvaTable[varNum].lvType); + } + else + { + TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum); + vt = tmpDsc->tdTempType(); + } + if (vt == TYP_REF || vt == TYP_BYREF) + emitGCvarDeadUpd(adr + ofs, dstRW2 - writeableOffset DEBUG_ARG(varNum)); + } + // if (emitInsWritesToLclVarStackLocPair(id)) + //{ + // unsigned ofs2 = ofs + TARGET_POINTER_SIZE; + // if (id->idGCrefReg2() != GCT_NONE) + // { + // emitGCvarLiveUpd(adr + ofs2, varNum, id->idGCrefReg2(), *dp); + // } + // else + // { + // // If the type of the local is a gc ref type, update the liveness. + // var_types vt; + // if (varNum >= 0) + // { + // // "Regular" (non-spill-temp) local. + // vt = var_types(emitComp->lvaTable[varNum].lvType); + // } + // else + // { + // TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum); + // vt = tmpDsc->tdTempType(); + // } + // if (vt == TYP_REF || vt == TYP_BYREF) + // emitGCvarDeadUpd(adr + ofs2, *dp); + // } + //} + } + +#ifdef DEBUG + /* Make sure we set the instruction descriptor size correctly */ + + // size_t expected = emitSizeOfInsDsc(id); + // assert(sz == expected); + + if (emitComp->opts.disAsm || emitComp->verbose) + { + code_t* cp = (code_t*)(*dp + writeableOffset); + while ((BYTE*)cp != dstRW) + { + emitDisInsName(*cp, (BYTE*)cp, id); + cp++; + } + } + + if (emitComp->compDebugBreak) + { + // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for + // emitting instruction a6, (i.e. IN00a6 in jitdump). + if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum) + { + assert(!"JitBreakEmitOutputInstr reached"); + } + } +#endif + + /* All instructions are expected to generate code */ + + assert(*dp != (dstRW - writeableOffset)); + + *dp = dstRW - writeableOffset; + + return sz; +} + +/*****************************************************************************/ +/*****************************************************************************/ + +#ifdef DEBUG + +// clang-format off +static const char* const RegNames[] = +{ + #define REGDEF(name, rnum, mask, sname) sname, + #include "register.h" +}; +// clang-format on + +//---------------------------------------------------------------------------------------- +// Disassemble the given instruction. +// The `emitter::emitDisInsName` is focused on the most important for debugging. +// So it implemented as far as simply and independently which is very useful for +// porting easily to the release mode. +// +// Arguments: +// code - The instruction's encoding. +// addr - The address of the code. +// id - The instrDesc of the code if needed. +// +// Note: +// The length of the instruction's name include aligned space is 13. +// + +void emitter::emitDisInsName(code_t code, const BYTE* addr, instrDesc* id) +{ + const BYTE* insAdr = addr - writeableOffset; + + unsigned int opcode = code & 0x7f; + assert((opcode & 0x3) == 0x3); + + bool disOpcode = !emitComp->opts.disDiffable; + bool disAddr = emitComp->opts.disAddr; + if (disAddr) + { + printf(" 0x%llx", insAdr); + } + + printf(" "); + + if (disOpcode) + { + printf("%08X ", code); + } + + switch (opcode) + { + case 0x37: // LUI + { + const char* rd = RegNames[(code >> 7) & 0x1f]; + int imm20 = (code >> 12) & 0xfffff; + if (imm20 & 0x80000) + { + imm20 |= 0xfff00000; + } + printf("lui %s, %d\n", rd, imm20); + return; + } + case 0x17: // AUIPC + { + const char* rd = RegNames[(code >> 7) & 0x1f]; + int imm20 = (code >> 12) & 0xfffff; + if (imm20 & 0x80000) + { + imm20 |= 0xfff00000; + } + printf("auipc %s, %d\n", rd, imm20); + return; + } + case 0x13: + { + unsigned int opcode2 = (code >> 12) & 0x7; + const char* rd = RegNames[(code >> 7) & 0x1f]; + const char* rs1 = RegNames[(code >> 15) & 0x1f]; + int imm12 = (((int)code) >> 20); // & 0xfff; + //if (imm12 & 0x800) + //{ + // imm12 |= 0xfffff000; + //} + switch (opcode2) + { + case 0x0: // ADDI + printf("addi %s, %s, %d\n", rd, rs1, imm12); + return; + case 0x1: // SLLI + printf("slli %s, %s, %d\n", rd, rs1, imm12 & 0x3f); // 6 BITS for SHAMT in RISCV64 + return; + case 0x2: // SLTI + printf("slti %s, %s, %d\n", rd, rs1, imm12); + return; + case 0x3: // SLTIU + printf("sltiu %s, %s, %d\n", rd, rs1, imm12); + return; + case 0x4: // XORI + printf("xori %s, %s, 0x%x\n", rd, rs1, imm12); + return; + case 0x5: // SRLI & SRAI + if (((code >> 30) & 0x1) == 0) + { + printf("srli %s, %s, %d\n", rd, rs1, imm12 & 0x3f); // 6BITS for SHAMT in RISCV64 + } + else + { + printf("srai %s, %s, %d\n", rd, rs1, imm12 & 0x3f); // 6BITS for SHAMT in RISCV64 + } + return; + case 0x6: // ORI + printf("ori %s, %s, 0x%x\n", rd, rs1, imm12 & 0xfff); + return; + case 0x7: // ANDI + printf("andi %s, %s, 0x%x\n", rd, rs1, imm12 & 0xfff); + return; + default: + printf("RISCV64 illegal instruction: 0x%08X\n", code); + return; + + } + } + case 0x1b: + { + unsigned int opcode2 = (code >> 12) & 0x7; + const char* rd = RegNames[(code >> 7) & 0x1f]; + const char* rs1 = RegNames[(code >> 15) & 0x1f]; + int imm12 = (((int)code) >> 20); // & 0xfff; + //if (imm12 & 0x800) + //{ + // imm12 |= 0xfffff000; + //} + switch (opcode2) + { + case 0x0: // ADDIW + printf("addiw %s, %s, %d\n", rd, rs1, imm12); + return; + case 0x1: // SLLIW + printf("slliw %s, %s, %d\n", rd, rs1, imm12 & 0x3f); // 6 BITS for SHAMT in RISCV64 + return; + case 0x5: // SRLIW & SRAIW + if (((code >> 30) & 0x1) == 0) + { + printf("srliw %s, %s, %d\n", rd, rs1, imm12 & 0x1f); // 5BITS for SHAMT in RISCV64 + } + else + { + printf("sraiw %s, %s, %d\n", rd, rs1, imm12 & 0x1f); // 5BITS for SHAMT in RISCV64 + } + return; + default: + printf("RISCV64 illegal instruction: 0x%08X\n", code); + return; + } + } + case 0x33: + { + unsigned int opcode2 = (code >> 25) & 0x3; + unsigned int opcode3 = (code >> 12) & 0x7; + const char* rd = RegNames[(code >> 7) & 0x1f]; + const char* rs1 = RegNames[(code >> 15) & 0x1f]; + const char* rs2 = RegNames[(code >> 20) & 0x1f]; + if (opcode2 == 0) + { + switch (opcode3) + { + case 0x0: // ADD & SUB + if (((code >> 30) & 0x1) == 0) + { + printf("add %s, %s, %s\n", rd, rs1, rs2); + } + else + { + printf("sub %s, %s, %s\n", rd, rs1, rs2); + } + return; + case 0x1: // SLL + printf("sll %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x2: // SLT + printf("slt %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x3: // SLTU + printf("sltu %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x4: // XOR + printf("xor %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x5: // SRL & SRA + if (((code >> 30) & 0x1) == 0) + { + printf("srl %s, %s, %s\n", rd, rs1, rs2); + } + else + { + printf("sra %s, %s, %s\n", rd, rs1, rs2); + } + return; + case 0x6: // OR + printf("or %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x7: // AND + printf("and %s, %s, %s\n", rd, rs1, rs2); + return; + default: + printf("RISCV64 illegal instruction: 0x%08X\n", code); + return; + + } + } + else if (opcode2 == 0x1) + { + switch (opcode3) + { + case 0x0: // MUL + printf("mul %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x1: // MULH + printf("mulh %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x2: // MULHSU + printf("mulhsu %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x3: // MULHU + printf("mulhu %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x4: // DIV + printf("div %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x5: // DIVU + printf("divu %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x6: // REM + printf("rem %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x7: // REMU + printf("remu %s, %s, %s\n", rd, rs1, rs2); + return; + default: + printf("RISCV64 illegal instruction: 0x%08X\n", code); + return; + + } + } + else + { + printf("RISCV64 illegal instruction: 0x%08X\n", code); + return; + } + } + case 0x3b: + { + unsigned int opcode2 = (code >> 25) & 0x3; + unsigned int opcode3 = (code >> 12) & 0x7; + const char* rd = RegNames[(code >> 7) & 0x1f]; + const char* rs1 = RegNames[(code >> 15) & 0x1f]; + const char* rs2 = RegNames[(code >> 20) & 0x1f]; + + if (opcode2 == 0) + { + switch (opcode3) + { + case 0x0: // ADDW & SUBW + if (((code >> 30) & 0x1) == 0) + { + printf("addw %s, %s, %s\n", rd, rs1, rs2); + } + else + { + printf("subw %s, %s, %s\n", rd, rs1, rs2); + } + return; + case 0x1: // SLLW + printf("sllw %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x5: // SRLW & SRAW + if (((code >> 30) & 0x1) == 0) + { + printf("srlw %s, %s, %s\n", rd, rs1, rs2); + } + else + { + printf("sraw %s, %s, %s\n", rd, rs1, rs2); + } + return; + default: + printf("RISCV64 illegal instruction: 0x%08X\n", code); + return; + } + } + else if (opcode2 == 1) + { + switch (opcode3) + { + case 0x0: // MULW + printf("mulw %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x4: // DIVW + printf("divw %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x5: // DIVUW + printf("divuw %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x6: // REMW + printf("remw %s, %s, %s\n", rd, rs1, rs2); + return; + case 0x7: // REMUW + printf("remuw %s, %s, %s\n", rd, rs1, rs2); + return; + default: + printf("RISCV64 illegal instruction: 0x%08X\n", code); + return; + } + } + else + { + printf("RISCV64 illegal instruction: 0x%08X\n", code); + return; + } + } + case 0x23: + { + unsigned int opcode2 = (code >> 12) & 0x7; + const char* rs1 = RegNames[(code >> 15) & 0x1f]; + const char* rs2 = RegNames[(code >> 20) & 0x1f]; + int offset = (((code >> 25) & 0x7f) << 5) | ((code >> 7) & 0x1f); + if (offset & 0x800) + { + offset |= 0xfffff000; + } + + switch(opcode2) + { + case 0: // SB + printf("sb %s, %d(%s)\n", rs2, offset, rs1); + return; + case 1: // SH + printf("sh %s, %d(%s)\n", rs2, offset, rs1); + return; + case 2: // SW + printf("sw %s, %d(%s)\n", rs2, offset, rs1); + return; + case 3: // SD + printf("sd %s, %d(%s)\n", rs2, offset, rs1); + return; + default: + printf("RISCV64 illegal instruction: 0x%08X\n", code); + return; + } + } + case 0x63: + { + unsigned int opcode2 = (code >> 12) & 0x7; + const char* rs1 = RegNames[(code >> 15) & 0x1f]; + const char* rs2 = RegNames[(code >> 20) & 0x1f]; + int offset = (((code >> 31) & 0x1) << 12) | + (((code >> 7) & 0x1) << 11) | + (((code >> 25) & 0x3f) << 5) | + (((code >> 8) & 0xf) << 1); + if (offset & 0x800) + { + offset |= 0xfffff000; + } + switch (opcode2) + { + case 0: // BEQ + printf("beq %s, %s, %d\n", rs1, rs2, offset); + return; + case 1: // BNE + printf("bne %s, %s, %d\n", rs1, rs2, offset); + return; + case 4: // BLT + printf("blt %s, %s, %d\n", rs1, rs2, offset); + return; + case 5: // BGE + printf("bge %s, %s, %d\n", rs1, rs2, offset); + return; + case 6: // BLTU + printf("bltu %s, %s, %d\n", rs1, rs2, offset); + return; + case 7: // BGEU + printf("bgeu %s, %s, %d\n", rs1, rs2, offset); + return; + default: + printf("RISCV64 illegal instruction: 0x%08X\n", code); + return; + } + } + case 0x03: + { + unsigned int opcode2 = (code >> 12) & 0x7; + const char* rs1 = RegNames[(code >> 15) & 0x1f]; + const char* rd = RegNames[(code >> 7) & 0x1f]; + int offset = ((code >> 20) & 0xfff); + if (offset & 0x800) + { + offset |= 0xfffff000; + } + + switch(opcode2) + { + case 0: // LB + printf("lb %s, %d(%s)\n", rd, offset, rs1); + return; + case 1: // LH + printf("lh %s, %d(%s)\n", rd, offset, rs1); + return; + case 2: // LW + printf("lw %s, %d(%s)\n", rd, offset, rs1); + return; + case 3: // LD + printf("ld %s, %d(%s)\n", rd, offset, rs1); + return; + case 4: // LBU + printf("lbu %s, %d(%s)\n", rd, offset, rs1); + return; + case 5: // LHU + printf("lhu %s, %d(%s)\n", rd, offset, rs1); + return; + case 6: // LWU + printf("lwu %s, %d(%s)\n", rd, offset, rs1); + return; + default: + printf("RISCV64 illegal instruction: 0x%08X\n", code); + return; + } + + } + case 0x67: + { + const char* rs1 = RegNames[(code >> 15) & 0x1f]; + const char* rd = RegNames[(code >> 7) & 0x1f]; + int offset = ((code >> 20) & 0xfff); + if (offset & 0x800) + { + offset |= 0xfffff000; + } + printf("jalr %s, %d(%s)\n", rd, offset, rs1); + return; + } + case 0x6f: + { + const char* rd = RegNames[(code >> 7) & 0x1f]; + int offset = (((code >> 31) & 0x1) << 20) | + (((code >> 12) & 0xff) << 12) | + (((code >> 20) & 0x1) << 11) | + (((code >> 21) & 0x3ff) << 1); + if (offset & 0x80000) + { + offset |= 0xfff00000; + } + printf("jal %s, %d\n", rd, offset); + return; + } + case 0x0f: + { + int pred = ((code) >> 24) & 0xf; + int succ = ((code) >> 20) & 0xf; + printf("fence %d, %d\n", pred, succ); + return; + } + case 0x73: + { + if (code == emitInsCode(INS_ebreak)) + { + printf("ebreak\n"); + } + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + return; + } + case 0x53: + { + unsigned int opcode2 = (code >> 25) & 0x7f; + unsigned int opcode3 = (code >> 20) & 0x1f; + unsigned int opcode4 = (code >> 12) & 0x7; + const char* fd = RegNames[((code >> 7) & 0x1f) | 0x20]; + const char* fs1 = RegNames[((code >> 15) & 0x1f) | 0x20]; + const char* fs2 = RegNames[((code >> 20) & 0x1f) | 0x20]; + + const char* xd = RegNames[(code >> 7) & 0x1f]; + const char* xs1 = RegNames[(code >> 15) & 0x1f]; + const char* xs2 = RegNames[(code >> 20) & 0x1f]; + + switch(opcode2) + { + case 0x00: // FADD.S + printf("fadd.s %s, %s, %s\n", fd, fs1, fs2); + return; + case 0x04: // FSUB.S + printf("fsub.s %s, %s, %s\n", fd, fs1, fs2); + return; + case 0x08: // FMUL.S + printf("fmul.s %s, %s, %s\n", fd, fs1, fs2); + return; + case 0x0C: // FDIV.S + printf("fdiv.s %s, %s, %s\n", fd, fs1, fs2); + return; + case 0x2C: // FSQRT.S + printf("fsqrt.s %s, %s\n", fd, fs1); + return; + case 0x10: // FSGNJ.S & FSGNJN.S & FSGNJX.S + if (opcode4 == 0) // FSGNJ.S + { + printf("fsgnj.s %s, %s, %s\n", fd, fs1, fs2); + } + else if (opcode4 == 1) // FSGNJN.S + { + printf("fsgnjn.s %s, %s, %s\n", fd, fs1, fs2); + } + else if (opcode4 == 2) // FSGNJX.S + { + printf("fsgnjx.s %s, %s, %s\n", fd, fs1, fs2); + } + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + return; + case 0x14: // FMIN.S & FMAX.S + if (opcode4 == 0) // FMIN.S + { + printf("fmin.s %s, %s, %s\n", fd, fs1, fs2); + } + else if (opcode4 == 1) // FMAX.S + { + printf("fmax.s %s, %s, %s\n", fd, fs1, fs2); + } + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + return; + case 0x60: // FCVT.W.S & FCVT.WU.S & FCVT.L.S & FCVT.LU.S + if (opcode3 == 0) // FCVT.W.S + { + printf("fcvt.w.s %s, %s\n", xd, fs1); + } + else if (opcode3 == 1) // FCVT.WU.S + { + printf("fcvt.wu.s %s, %s\n", xd, fs1); + } + else if (opcode3 == 2) // FCVT.L.S + { + printf("fcvt.l.s %s, %s\n", xd, fs1); + } + else if (opcode3 == 3) // FCVT.LU.S + { + printf("fcvt.lu.s %s, %s\n", xd, fs1); + } + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + return; + case 0x70: // FMV.X.W & FCLASS.S + if (opcode4 == 0) // FMV.X.W + { + printf("fmv.x.w %s, %s\n", xd, xs1); + } + else if (opcode4 == 1) // FCLASS.S + { + printf("fclass.s %s, %s\n", xd, fs1); + } + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + return; + case 0x50: // FLE.S & FLT.S & FEQ.S + if (opcode4 == 0) // FLE.S + { + printf("fle.s %s, %s, %s\n", xd, fs1, fs2); + } + else if (opcode4 == 1) // FLT.S + { + printf("flt.s %s, %s, %s\n", xd, fs1, fs2); + } + else if (opcode4 == 2) // FEQ.S + { + printf("feq.s %s, %s, %s\n", fd, xs1, fs2); + } + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + return; + case 0x68: // FCVT.S.W & FCVT.S.WU & FCVT.S.L & FCVT.S.LU + if (opcode3 == 0) // FCVT.S.W + { + printf("fcvt.s.w %s, %s\n", fd, xs1); + } + else if (opcode3 == 1) // FCVT.S.WU + { + printf("fcvt.s.wu %s, %s\n", fd, xs1); + } + else if (opcode3 == 2) // FCVT.S.L + { + printf("fcvt.s.l %s, %s\n", fd, xs1); + } + else if (opcode3 == 3) // FCVT.S.LU + { + printf("fcvt.s.lu %s, %s\n", fd, xs1); + } + + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + return; + case 0x78: // FMV.W.X + printf("fmv.w.x %s, %s\n", fd, xs1); + return; + case 0x1: // FADD.D + printf("fadd.d %s, %s, %s\n", fd, fs1, fs2); + return; + case 0x5: // FSUB.D + printf("fsub.d %s, %s, %s\n", fd, fs1, fs2); + return; + case 0x9: // FMUL.D + printf("fmul.d %s, %s, %s\n", fd, fs1, fs2); + return; + case 0xd: // FDIV.D + printf("fdiv.d %s, %s, %s\n", fd, fs1, fs2); + return; + case 0x2d: // FSQRT.D + printf("fsqrt.d %s, %s\n", fd, fs1); + return; + case 0x11: // FSGNJ.D & FSGNJN.D & FSGNJX.D + if (opcode4 == 0) // FSGNJ.D + { + printf("fsgnj.d %s, %s, %s\n", fd, fs1, fs2); + } + else if (opcode4 == 1) // FSGNJN.D + { + printf("fsgnjn.d %s, %s, %s\n", fd, fs1, fs2); + } + else if (opcode4 == 2) // FSGNJX.D + { + printf("fsgnjx.d %s, %s, %s\n", fd, fs1, fs2); + } + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + return; + case 0x15: // FMIN.D & FMAX.D + if (opcode4 == 0) // FMIN.D + { + printf("fmin.d %s, %s, %s\n", fd, fs1, fs2); + } + else if (opcode4 == 1) // FMAX.D + { + printf("fmax.d %s, %s, %s\n", fd, fs1, fs2); + } + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + return; + case 0x20: // FCVT.S.D + if (opcode3 == 1) // FCVT.S.D + { + printf("fcvt.s.d %s, %s\n", fd, fs1); + } + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + return; + case 0x21: // FCVT.D.S + if (opcode4 == 1) // FCVT.D.S + { + printf("fcvt.d.s %s, %s\n", fd, fs1); + } + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + return; + case 0x51: // FLE.D & FLT.D & FEQ.D + if (opcode4 == 0) // FLE.D + { + printf("fle.d %s, %s, %s\n", xd, fs1, fs2); + } + else if (opcode4 == 1) // FLT.D + { + printf("flt.d %s, %s, %s\n", xd, fs1, fs2); + } + else if (opcode4 == 2) // FEQ.D + { + printf("feq.d %s, %s, %s\n", xd, fs1, fs2); + } + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + return; + case 0x61: // FCVT.W.D & FCVT.WU.D & FCVT.L.D & FCVT.LU.D + + if (opcode3 == 0) // FCVT.W.D + { + printf("fcvt.w.d %s, %s\n", xd, fs1); + } + if (opcode3 == 1) // FCVT.WU.D + { + printf("fcvt.wu.d %s, %s\n", xd, fs1); + } + else if (opcode3 == 2) // FCVT.L.D + { + printf("fcvt.l.d %s, %s\n", xd, fs1); + } + else if (opcode3 == 3) // FCVT.LU.D + { + printf("fcvt.lu.d %s, %s\n", xd, fs1); + } + + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + return; + case 0x69: // FCVT.D.W & FCVT.D.WU & FCVT.D.L & FCVT.D.LU + if (opcode3 == 0) // FCVT.D.W + { + printf("fcvt.d.w %s, %s\n", fd, xs1); + } + else if (opcode3 == 1) // FCVT.D.WU + { + printf("fcvt.d.wu %s, %s\n", fd, xs1); + } + else if (opcode3 == 2) + { + printf("fcvt.d.l %s, %s\n", fd, xs1); + } + else if (opcode3 == 3) + { + printf("fcvt.d.lu %s, %s\n", fd, xs1); + } + + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + + return; + case 0x71: // FMV.X.D & FCLASS.D + if (opcode4 == 0) // FMV.X.D + { + printf("fmv.x.d %s, %s\n", xd, fs1); + } + else if (opcode4 == 1) // FCLASS.D + { + printf("fclass.d %s, %s\n", xd, fs1); + } + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + return; + case 0x79: // FMV.D.X + assert(opcode4 == 0); + printf("fmv.d.x %s, %s\n", fd, xs1); + return; + default: + printf("RISCV64 illegal instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + return; + } + return; + } + case 0x27: + { + unsigned int opcode2 = (code >> 12) & 0x7; + + const char* rs1 = RegNames[(code >> 15) & 0x1f]; + const char* rs2 = RegNames[((code >> 20) & 0x1f) | 0x20]; + int offset = (((code >> 25) & 0x7f) << 5) | ((code >> 7) & 0x1f); + if (offset & 0x800) + { + offset |= 0xfffff000; + } + if (opcode2 == 2) // FSW + { + printf("fsw %s, %d(%s)\n", rs2, offset, rs1); + } + else if (opcode2 == 3) // FSD + { + printf("fsd %s, %d(%s)\n", rs2, offset, rs1); + } + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + return; + } + case 0x7: + { + unsigned int opcode2 = (code >> 12) & 0x7; + const char* rs1 = RegNames[(code >> 15) & 0x1f]; + const char* rd = RegNames[((code >> 7) & 0x1f) | 0x20]; + int offset = ((code >> 20) & 0xfff); + if (offset & 0x800) + { + offset |= 0xfffff000; + } + if (opcode2 == 2) // FLW + { + printf("flw %s, %d(%s)\n", rd, offset, rs1); + } + else if (opcode2 == 3) // FLD + { + printf("fld %s, %d(%s)\n", rd, offset, rs1); + } + else + { + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + return; + } + default: + printf("Not implemented instruction: 0x%08X\n", code); + _ASSERTE(!"TODO RISCV64 NYI"); + } + + _ASSERTE(!"TODO RISCV64 NYI"); +} + +/***************************************************************************** + * + * Display (optionally) the instruction encoding in hex + */ + +void emitter::emitDispInsHex(instrDesc* id, BYTE* code, size_t sz) +{ + // We do not display the instruction hex if we want diff-able disassembly + if (!emitComp->opts.disDiffable) + { + if (sz == 4) + { + printf(" %08X ", (*((code_t*)code))); + } + else + { + assert(sz == 0); + printf(" "); + } + } +} + +void emitter::emitDispIns( + instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig) +{ + // RISCV64 implements this similar by `emitter::emitDisInsName`. + // For RISCV64 maybe the `emitDispIns` is over complicate. + // The `emitter::emitDisInsName` is focused on the most important for debugging. + NYI_RISCV64("RISCV64 not used the emitter::emitDispIns"); +} + +/***************************************************************************** + * + * Display a stack frame reference. + */ + +void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) +{ + _ASSERTE(!"TODO RISCV64 NYI"); +} + +#endif // DEBUG + +// Generate code for a load or store operation with a potentially complex addressing mode +// This method handles the case of a GT_IND with contained GT_LEA op1 of the x86 form [base + index*sccale + offset] +// +void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir) +{ + GenTree* addr = indir->Addr(); + + if (addr->isContained()) + { + assert(addr->OperIs(GT_CLS_VAR_ADDR, GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR, GT_LEA)); + + int offset = 0; + DWORD lsl = 0; + + if (addr->OperGet() == GT_LEA) + { + offset = addr->AsAddrMode()->Offset(); + if (addr->AsAddrMode()->gtScale > 0) + { + assert(isPow2(addr->AsAddrMode()->gtScale)); + BitScanForward(&lsl, addr->AsAddrMode()->gtScale); + } + } + + GenTree* memBase = indir->Base(); + emitAttr addType = varTypeIsGC(memBase) ? EA_BYREF : EA_PTRSIZE; + + if (indir->HasIndex()) + { + GenTree* index = indir->Index(); + + if (offset != 0) + { + regNumber tmpReg = indir->GetSingleTempReg(); + + if (isValidSimm12(offset)) + { + if (lsl > 0) + { + // Generate code to set tmpReg = base + index*scale + emitIns_R_R_I(INS_slli, addType, tmpReg, index->GetRegNum(), lsl); + emitIns_R_R_R(INS_add, addType, tmpReg, memBase->GetRegNum(), tmpReg); + } + else // no scale + { + // Generate code to set tmpReg = base + index + emitIns_R_R_R(INS_add, addType, tmpReg, memBase->GetRegNum(), index->GetRegNum()); + } + + noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg)); + + // Then load/store dataReg from/to [tmpReg + offset] + emitIns_R_R_I(ins, attr, dataReg, tmpReg, offset); + } + else // large offset + { + // First load/store tmpReg with the large offset constant + emitIns_I_la(EA_PTRSIZE, tmpReg, + offset); // codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); + // Then add the base register + // rd = rd + base + emitIns_R_R_R(INS_add, addType, tmpReg, tmpReg, memBase->GetRegNum()); + + noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg)); + noway_assert(tmpReg != index->GetRegNum()); + + regNumber scaleReg = indir->GetSingleTempReg(); + // Then load/store dataReg from/to [tmpReg + index*scale] + emitIns_R_R_I(INS_slli, addType, scaleReg, index->GetRegNum(), lsl); + emitIns_R_R_R(INS_add, addType, tmpReg, tmpReg, scaleReg); + emitIns_R_R_I(ins, attr, dataReg, tmpReg, 0); + } + } + else // (offset == 0) + { + // Then load/store dataReg from/to [memBase + index] + switch (EA_SIZE(emitTypeSize(indir->TypeGet()))) + { + case EA_1BYTE: + assert(((ins <= INS_lhu) && (ins >= INS_lb)) || ins == INS_lwu || ins == INS_ld || ((ins <= INS_sw) && (ins >= INS_sb)) || ins == INS_sd); + if (ins <= INS_lhu || ins == INS_lwu || ins == INS_ld) + { + if (varTypeIsUnsigned(indir->TypeGet())) + ins = INS_lbu; + else + ins = INS_lb; + } + else + ins = INS_sb; + break; + case EA_2BYTE: + assert(((ins <= INS_lhu) && (ins >= INS_lb)) || ins == INS_lwu || ins == INS_ld || ((ins <= INS_sw) && (ins >= INS_sb)) || ins == INS_sd); + if (ins <= INS_lhu || ins == INS_lwu || ins == INS_ld) + { + if (varTypeIsUnsigned(indir->TypeGet())) + ins = INS_lhu; + else + ins = INS_lh; + } + else + ins = INS_sh; + break; + case EA_4BYTE: + assert(((ins <= INS_lhu) && (ins >= INS_lb)) || ins == INS_lwu || ins == INS_ld || ((ins <= INS_sw) && (ins >= INS_sb)) || ins == INS_sd || ins == INS_fsw || ins == INS_flw); + assert(INS_fsw > INS_sd); + if (ins <= INS_lhu || ins == INS_lwu || ins == INS_ld) + { + if (varTypeIsUnsigned(indir->TypeGet())) + ins = INS_lwu; + else + ins = INS_lw; + } + else if (ins != INS_flw && ins != INS_fsw) + ins = INS_sw; + break; + case EA_8BYTE: + assert(((ins <= INS_lhu) && (ins >= INS_lb)) || + ins == INS_lwu || ins == INS_ld || + ((ins <= INS_sw) && (ins >= INS_sb)) || ins == INS_sd || ins == INS_fld || ins == INS_fsd); + assert(INS_fsd > INS_sd); + if (ins <= INS_lhu || ins == INS_lwu || ins == INS_ld) + { + ins = INS_ld; + } + else if (ins != INS_fld && ins != INS_fsd) + ins = INS_sd; + break; + default: + assert(!"------------TODO for RISCV64: unsupported ins."); + } + + // TODO REG21 => REG_RA + if (lsl > 0) + { + // Then load/store dataReg from/to [memBase + index*scale] + emitIns_R_R_I(INS_slli, emitActualTypeSize(index->TypeGet()), REG_RA, index->GetRegNum(), lsl); + emitIns_R_R_R(INS_add, addType, REG_RA, memBase->GetRegNum(), REG_RA); + emitIns_R_R_I(ins, attr, dataReg, REG_RA, 0); + } + else // no scale + { + emitIns_R_R_R(INS_add, addType, REG_RA, memBase->GetRegNum(), index->GetRegNum()); + emitIns_R_R_I(ins, attr, dataReg, REG_RA, 0); + } + } + } + else // no Index register + { + if (addr->OperGet() == GT_CLS_VAR_ADDR) + { + // Get a temp integer register to compute long address. + regNumber addrReg = indir->GetSingleTempReg(); + + emitIns_R_C(ins, attr, dataReg, addrReg, addr->AsClsVar()->gtClsVarHnd, 0); + } + else if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)) + { + GenTreeLclVarCommon* varNode = addr->AsLclVarCommon(); + unsigned lclNum = varNode->GetLclNum(); + unsigned offset = varNode->GetLclOffs(); + if (emitInsIsStore(ins)) + { + emitIns_S_R(ins, attr, dataReg, REG_NA, lclNum, offset); + } + else + { + emitIns_R_S(ins, attr, dataReg, lclNum, offset); + } + } + else if (isValidSimm12(offset)) + { + // Then load/store dataReg from/to [memBase + offset] + emitIns_R_R_I(ins, attr, dataReg, memBase->GetRegNum(), offset); + } + else + { + // We require a tmpReg to hold the offset + regNumber tmpReg = indir->GetSingleTempReg(); + + // First load/store tmpReg with the large offset constant + emitIns_I_la(EA_PTRSIZE, tmpReg, offset); + // codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); + + // Then load/store dataReg from/to [memBase + tmpReg] + emitIns_R_R_R(INS_add, addType, tmpReg, memBase->GetRegNum(), tmpReg); + emitIns_R_R_I(ins, attr, dataReg, tmpReg, 0); + } + } + } + else // addr is not contained, so we evaluate it into a register + { +#ifdef DEBUG + if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)) + { + // If the local var is a gcref or byref, the local var better be untracked, because we have + // no logic here to track local variable lifetime changes, like we do in the contained case + // above. E.g., for a `st a0,[a1]` for byref `a1` to local `V01`, we won't store the local + // `V01` and so the emitter can't update the GC lifetime for `V01` if this is a variable birth. + LclVarDsc* varDsc = emitComp->lvaGetDesc(addr->AsLclVarCommon()); + assert(!varDsc->lvTracked); + } +#endif // DEBUG + + // Then load/store dataReg from/to [addrReg] + emitIns_R_R_I(ins, attr, dataReg, addr->GetRegNum(), 0); + } +} + +// The callee must call genConsumeReg() for any non-contained srcs +// and genProduceReg() for any non-contained dsts. + +regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src) +{ + NYI_RISCV64("emitInsBinary-----unused"); + return REG_R0; +} + +// The callee must call genConsumeReg() for any non-contained srcs +// and genProduceReg() for any non-contained dsts. +regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2) +{ + // dst can only be a reg + assert(!dst->isContained()); + + // find immed (if any) - it cannot be a dst + // Only one src can be an int. + GenTreeIntConCommon* intConst = nullptr; + GenTree* nonIntReg = nullptr; + + bool needCheckOv = dst->gtOverflowEx(); + + if (varTypeIsFloating(dst)) + { + // src1 can only be a reg + assert(!src1->isContained()); + // src2 can only be a reg + assert(!src2->isContained()); + } + else // not floating point + { + // src2 can be immed or reg + assert(!src2->isContained() || src2->isContainedIntOrIImmed()); + + // Check src2 first as we can always allow it to be a contained immediate + if (src2->isContainedIntOrIImmed()) + { + intConst = src2->AsIntConCommon(); + nonIntReg = src1; + } + // Only for commutative operations do we check src1 and allow it to be a contained immediate + else if (dst->OperIsCommutative()) + { + // src1 can be immed or reg + assert(!src1->isContained() || src1->isContainedIntOrIImmed()); + + // Check src1 and allow it to be a contained immediate + if (src1->isContainedIntOrIImmed()) + { + assert(!src2->isContainedIntOrIImmed()); + intConst = src1->AsIntConCommon(); + nonIntReg = src2; + } + } + else + { + // src1 can only be a reg + assert(!src1->isContained()); + } + } + + if (needCheckOv) + { + if (ins == INS_add) + { + assert(attr == EA_8BYTE); + } + else if (ins == INS_addw) // || ins == INS_add + { + assert(attr == EA_4BYTE); + } + else if (ins == INS_addi) + { + assert(intConst != nullptr); + } + else if (ins == INS_addiw) + { + assert(intConst != nullptr); + } + else if (ins == INS_sub) + { + assert(attr == EA_8BYTE); + } + else if (ins == INS_subw) + { + assert(attr == EA_4BYTE); + } + else if ((ins == INS_mul) || (ins == INS_mulh) || (ins == INS_mulhu)) + { + assert(attr == EA_8BYTE); + // NOTE: overflow format doesn't support an int constant operand directly. + assert(intConst == nullptr); + } + else if (ins == INS_mulw) + { + assert(attr == EA_4BYTE); + // NOTE: overflow format doesn't support an int constant operand directly. + assert(intConst == nullptr); + } + else + { +#ifdef DEBUG + printf("RISCV64-Invalid ins for overflow check: %s\n", codeGen->genInsName(ins)); +#endif + assert(!"Invalid ins for overflow check"); + } + } + + if (intConst != nullptr) + { + ssize_t imm = intConst->IconValue(); + if (ins == INS_andi || ins == INS_ori || ins == INS_xori) + { + assert(isValidSimm12(imm)); + } + else + { + assert(isValidSimm12(imm)); + } + + if (ins == INS_sub) + { + assert(attr == EA_8BYTE); + assert(imm != -2048); + ins = INS_addi; + imm = -imm; + } + else if (ins == INS_subw) + { + assert(attr == EA_4BYTE); + assert(imm != -2048); + ins = INS_addiw; + imm = -imm; + } + + assert(ins == INS_addi || ins == INS_addiw || ins == INS_andi || ins == INS_ori || ins == INS_xori); + + if (needCheckOv) + { + emitIns_R_R_R(INS_or, attr, REG_RA, nonIntReg->GetRegNum(), REG_R0); // TODO R21 => RA + } + + emitIns_R_R_I(ins, attr, dst->GetRegNum(), nonIntReg->GetRegNum(), imm); + + if (needCheckOv) + { + if (ins == INS_addi || ins == INS_addiw) + { + // A = B + C + if ((dst->gtFlags & GTF_UNSIGNED) != 0) + { + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bltu, dst->GetRegNum(), nullptr, REG_RA); // TODO R21 => RA + } + else + { + if (imm > 0) + { + // B > 0 and C > 0, if A < B, goto overflow + BasicBlock* tmpLabel = codeGen->genCreateTempLabel(); + emitIns_J_cond_la(INS_bge, tmpLabel, REG_R0, REG_RA); // TODO R21 => RA + emitIns_R_R_I(INS_slti, EA_PTRSIZE, REG_RA, dst->GetRegNum(), imm); // TODO R21 => RA + + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_RA); // TODO R21 => RA + + codeGen->genDefineTempLabel(tmpLabel); + } + else if (imm < 0) + { + // B < 0 and C < 0, if A > B, goto overflow + BasicBlock* tmpLabel = codeGen->genCreateTempLabel(); + emitIns_J_cond_la(INS_bge, tmpLabel, REG_RA, REG_R0); // TODO R21 => RA + emitIns_R_R_I(INS_addi, attr, REG_RA, REG_R0, imm); // TODO R21 => RA + + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, REG_RA, nullptr, dst->GetRegNum()); // TODO R21 => RA + + codeGen->genDefineTempLabel(tmpLabel); + } + } + } + else + { + assert(!"unimplemented on RISCV64 yet"); + } + } + } + else if (varTypeIsFloating(dst)) + { + emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); + } + else if (dst->OperGet() == GT_MUL) + { + if (!needCheckOv && !(dst->gtFlags & GTF_UNSIGNED)) + { + emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); + } + else + { + if (needCheckOv) + { + assert(REG_RA != dst->GetRegNum()); + assert(REG_RA != src1->GetRegNum()); + assert(REG_RA != src2->GetRegNum()); + + assert(REG_T6 != dst->GetRegNum()); + assert(REG_T6 != src1->GetRegNum()); + assert(REG_T6 != src2->GetRegNum()); + + if ((dst->gtFlags & GTF_UNSIGNED) != 0) + { + if (attr == EA_4BYTE) + { + emitIns_R_R_I(INS_slli, EA_8BYTE, REG_RA, src1->GetRegNum(), 32); + emitIns_R_R_I(INS_slli, EA_8BYTE, REG_T6, src2->GetRegNum(), 32); + emitIns_R_R_R(INS_mulhu, EA_8BYTE, REG_RA, REG_RA, REG_T6); + emitIns_R_R_I(INS_srai, attr, REG_RA, REG_RA, 32); + } + else + { + emitIns_R_R_R(INS_mulhu, attr, REG_RA, src1->GetRegNum(), src2->GetRegNum()); + } + } + else + { + if (attr == EA_4BYTE) + { + emitIns_R_R_R(INS_mul, EA_8BYTE, REG_RA, src1->GetRegNum(), src2->GetRegNum()); + emitIns_R_R_I(INS_srai, attr, REG_RA, REG_RA, 32); + } + else + { + emitIns_R_R_R(INS_mulhu, attr, REG_RA, src1->GetRegNum(), src2->GetRegNum()); + } + } + } + + // n * n bytes will store n bytes result + emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); + + if ((dst->gtFlags & GTF_UNSIGNED) != 0) + { + if (attr == EA_4BYTE) + { + emitIns_R_R_I(INS_slli, EA_8BYTE, dst->GetRegNum(), dst->GetRegNum(), 32); + emitIns_R_R_I(INS_srli, EA_8BYTE, dst->GetRegNum(), dst->GetRegNum(), 32); + } + } + + if (needCheckOv) + { + assert(REG_RA != dst->GetRegNum()); + assert(REG_RA != src1->GetRegNum()); + assert(REG_RA != src2->GetRegNum()); + + if ((dst->gtFlags & GTF_UNSIGNED) != 0) + { + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_RA); + } + else + { + regNumber tmpReg = dst->GetSingleTempReg(); + assert(tmpReg != dst->GetRegNum()); + assert(tmpReg != src1->GetRegNum()); + assert(tmpReg != src2->GetRegNum()); + size_t imm = (EA_SIZE(attr) == EA_8BYTE) ? 63 : 31; + emitIns_R_R_I(EA_SIZE(attr) == EA_8BYTE ? INS_srai : INS_sraiw, attr, tmpReg, dst->GetRegNum(), + imm); + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_RA, nullptr, tmpReg); + } + } + } + } + else if (dst->OperIs(GT_AND, GT_AND_NOT, GT_OR, GT_XOR)) + { + emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); + + // TODO-RISCV64-CQ: here sign-extend dst when deal with 32bit data is too conservative. + if (EA_SIZE(attr) == EA_4BYTE) + emitIns_R_R_I(INS_slliw, attr, dst->GetRegNum(), dst->GetRegNum(), 0); + } + else + { + regNumber regOp1 = src1->GetRegNum(); + regNumber regOp2 = src2->GetRegNum(); + regNumber saveOperReg1 = REG_NA; + regNumber saveOperReg2 = REG_NA; + + if ((dst->gtFlags & GTF_UNSIGNED) && (attr == EA_8BYTE)) + { + if (src1->gtType == TYP_INT) + { + emitIns_R_R_I(INS_slli, EA_8BYTE, regOp1, regOp1, 32); + emitIns_R_R_I(INS_srli, EA_8BYTE, regOp1, regOp1, 32); + } + if (src2->gtType == TYP_INT) + { + emitIns_R_R_I(INS_slli, EA_8BYTE, regOp2, regOp2, 32); + emitIns_R_R_I(INS_srli, EA_8BYTE, regOp2, regOp2, 32); + } + } + if (needCheckOv) + { + assert(!varTypeIsFloating(dst)); + + assert(REG_RA != dst->GetRegNum()); + + if (dst->GetRegNum() == regOp1) + { + assert(REG_RA != regOp1); + saveOperReg1 = REG_RA; + saveOperReg2 = regOp2; + emitIns_R_R_I(INS_addi, attr, REG_RA, regOp1, 0); + } + else if (dst->GetRegNum() == regOp2) + { + assert(REG_RA != regOp2); + saveOperReg1 = regOp1; + saveOperReg2 = REG_RA; + emitIns_R_R_I(INS_addi, attr, REG_RA, regOp2, 0); + } + else + { + saveOperReg1 = regOp1; + saveOperReg2 = regOp2; + } + } + + emitIns_R_R_R(ins, attr, dst->GetRegNum(), regOp1, regOp2); + + if (needCheckOv) + { + if (dst->OperGet() == GT_ADD || dst->OperGet() == GT_SUB) + { + ssize_t imm; + regNumber tempReg1; + regNumber tempReg2; + // ADD : A = B + C + // SUB : C = A - B + if ((dst->gtFlags & GTF_UNSIGNED) != 0) + { + // if A < B, goto overflow + if (dst->OperGet() == GT_ADD) + { + tempReg1 = dst->GetRegNum(); + tempReg2 = saveOperReg1; + } + else + { + tempReg1 = saveOperReg1; + tempReg2 = saveOperReg2; + } + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bltu, tempReg1, nullptr, tempReg2); + } + else + { + tempReg1 = REG_T2; // TODO CHECK REG_RA to REG_T2 // src1->GetSingleTempReg(); + tempReg2 = codeGen->rsGetRsvdReg(); + assert(tempReg1 != tempReg2); + assert(tempReg1 != saveOperReg1); + assert(tempReg2 != saveOperReg2); + + ssize_t ui6 = (attr == EA_4BYTE) ? 31 : 63; + if (dst->OperGet() == GT_ADD) + emitIns_R_R_I(INS_srli, attr, tempReg1, saveOperReg1, ui6); + else + emitIns_R_R_I(INS_srli, attr, tempReg1, dst->GetRegNum(), ui6); + emitIns_R_R_I(INS_srli, attr, tempReg2, saveOperReg2, ui6); + + emitIns_R_R_R(INS_xor, attr, tempReg1, tempReg1, tempReg2); + if (attr == EA_4BYTE) + { + imm = 1; + emitIns_R_R_I(INS_andi, attr, tempReg1, tempReg1, imm); + emitIns_R_R_I(INS_andi, attr, tempReg2, tempReg2, imm); + } + // if (B > 0 && C < 0) || (B < 0 && C > 0), skip overflow + BasicBlock* tmpLabel = codeGen->genCreateTempLabel(); + BasicBlock* tmpLabel2 = codeGen->genCreateTempLabel(); + BasicBlock* tmpLabel3 = codeGen->genCreateTempLabel(); + + emitIns_J_cond_la(INS_bne, tmpLabel, tempReg1, REG_R0); + + emitIns_J_cond_la(INS_bne, tmpLabel3, tempReg2, REG_R0); + + // B > 0 and C > 0, if A < B, goto overflow + emitIns_J_cond_la(INS_bge, tmpLabel, dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1, + dst->OperGet() == GT_ADD ? saveOperReg1 : saveOperReg2); + + codeGen->genDefineTempLabel(tmpLabel2); + + codeGen->genJumpToThrowHlpBlk(EJ_jmp, SCK_OVERFLOW); + + codeGen->genDefineTempLabel(tmpLabel3); + + // B < 0 and C < 0, if A > B, goto overflow + emitIns_J_cond_la(INS_blt, tmpLabel2, dst->OperGet() == GT_ADD ? saveOperReg1 : saveOperReg2, + dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1); + + codeGen->genDefineTempLabel(tmpLabel); + } + } + else + { +#ifdef DEBUG + printf("---------[RISCV64]-NOTE: UnsignedOverflow instruction %d\n", ins); +#endif + assert(!"unimplemented on RISCV64 yet"); + } + } + } + + return dst->GetRegNum(); +} + +unsigned emitter::get_curTotalCodeSize() +{ + return emitTotalCodeSize; +} + +#if defined(DEBUG) || defined(LATE_DISASM) + +//---------------------------------------------------------------------------------------- +// getInsExecutionCharacteristics: +// Returns the current instruction execution characteristics +// +// Arguments: +// id - The current instruction descriptor to be evaluated +// +// Return Value: +// A struct containing the current instruction execution characteristics +// +// Notes: +// The instruction latencies and throughput values returned by this function +// are NOT accurate and just a function feature. +emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(instrDesc* id) +{ + insExecutionCharacteristics result; + + // TODO-RISCV64: support this function. + result.insThroughput = PERFSCORE_THROUGHPUT_ZERO; + result.insLatency = PERFSCORE_LATENCY_ZERO; + result.insMemoryAccessKind = PERFSCORE_MEMORY_NONE; + + return result; +} + +#endif // defined(DEBUG) || defined(LATE_DISASM) + +#ifdef DEBUG +//------------------------------------------------------------------------ +// emitRegName: Returns a general-purpose register name or SIMD and floating-point scalar register name. +// +// TODO-RISCV64: supporting SIMD. +// Arguments: +// reg - A general-purpose register orfloating-point register. +// size - unused parameter. +// varName - unused parameter. +// +// Return value: +// A string that represents a general-purpose register name or floating-point scalar register name. +// +const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName) +{ + assert(reg < REG_COUNT); + + const char* rn = nullptr; + + rn = RegNames[reg]; + assert(rn != nullptr); + + return rn; +} +#endif + +//------------------------------------------------------------------------ +// IsMovInstruction: Determines whether a give instruction is a move instruction +// +// Arguments: +// ins -- The instruction being checked +// +bool emitter::IsMovInstruction(instruction ins) +{ + _ASSERTE(!"TODO RISCV64 NYI"); + return false; +} + +#endif // defined(TARGET_RISCV64) diff --git a/src/coreclr/jit/emitriscv64.h b/src/coreclr/jit/emitriscv64.h new file mode 100644 index 00000000000000..1a29c1598a48e2 --- /dev/null +++ b/src/coreclr/jit/emitriscv64.h @@ -0,0 +1,244 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#if defined(TARGET_RISCV64) + +// The LOONGARCH64 instructions are all 32 bits in size. +// we use an unsigned int to hold the encoded instructions. +// This typedef defines the type that we use to hold encoded instructions. +// +typedef unsigned int code_t; + +/************************************************************************/ +/* Routines that compute the size of / encode instructions */ +/************************************************************************/ + +struct CnsVal +{ + ssize_t cnsVal; + bool cnsReloc; +}; + +#ifdef DEBUG + +/************************************************************************/ +/* Debug-only routines to display instructions */ +/************************************************************************/ + +const char* emitFPregName(unsigned reg, bool varName = true); +const char* emitVectorRegName(regNumber reg); + +void emitDisInsName(code_t code, const BYTE* addr, instrDesc* id); +#endif // DEBUG + +void emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1 = REG_R0, regNumber reg2 = REG_R0); +void emitIns_I_la(emitAttr attr, regNumber reg, ssize_t imm); + +/************************************************************************/ +/* Private members that deal with target-dependent instr. descriptors */ +/************************************************************************/ + +private: +instrDesc* emitNewInstrCallDir(int argCnt, + VARSET_VALARG_TP GCvars, + regMaskTP gcrefRegs, + regMaskTP byrefRegs, + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + +instrDesc* emitNewInstrCallInd(int argCnt, + ssize_t disp, + VARSET_VALARG_TP GCvars, + regMaskTP gcrefRegs, + regMaskTP byrefRegs, + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + +/************************************************************************/ +/* Private helpers for instruction output */ +/************************************************************************/ + +private: +bool emitInsIsLoad(instruction ins); +bool emitInsIsStore(instruction ins); +bool emitInsIsLoadOrStore(instruction ins); + +emitter::code_t emitInsCode(instruction ins /*, insFormat fmt*/); + +// Generate code for a load or store operation and handle the case of contained GT_LEA op1 with [base + offset] +void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir); + +// Emit the 32-bit LOONGARCH64 instruction 'code' into the 'dst' buffer +unsigned emitOutput_Instr(BYTE* dst, code_t code); + +// Method to do check if mov is redundant with respect to the last instruction. +// If yes, the caller of this method can choose to omit current mov instruction. +static bool IsMovInstruction(instruction ins); +bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip); +bool IsRedundantLdStr( + instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); // New functions end. + +/************************************************************************/ +/* Public inline informational methods */ +/************************************************************************/ + +public: +// Returns true if 'value' is a legal signed immediate 12 bit encoding. +static bool isValidSimm12(ssize_t value) +{ + return -(((int)1) << 11) <= value && value < (((int)1) << 11); +}; + +// Returns true if 'value' is a legal unsigned immediate 12 bit encoding. +static bool isValidUimm12(ssize_t value) +{ + return (0 == (value >> 12)); +} + +// Returns true if 'value' is a legal unsigned immediate 11 bit encoding. +static bool isValidUimm11(ssize_t value) +{ + return (0 == (value >> 11)); +} + +// Returns true if 'value' is a legal signed immediate 20 bit encoding. +static bool isValidSimm20(ssize_t value) +{ + return -(((int)1) << 19) <= value && value < (((int)1) << 19); +}; + +// Returns true if 'value' is a legal signed immediate 32 bit encoding. +static bool isValidSimm32(ssize_t value) +{ + return -(((ssize_t)1) << 31) <= value && value < (((ssize_t)1) << 31); +}; + +// Returns the number of bits used by the given 'size'. +inline static unsigned getBitWidth(emitAttr size) +{ + assert(size <= EA_8BYTE); + return (unsigned)size * BITS_PER_BYTE; +} + +inline static bool isGeneralRegister(regNumber reg) +{ + return (reg >= REG_INT_FIRST) && (reg <= REG_INT_LAST); +} + +inline static bool isGeneralRegisterOrR0(regNumber reg) +{ + return (reg >= REG_FIRST) && (reg <= REG_INT_LAST); +} // Includes REG_R0 + +inline static bool isFloatReg(regNumber reg) +{ + return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST); +} + +/************************************************************************/ +/* Output target-independent instructions */ +/************************************************************************/ + +void emitIns_J(instruction ins, BasicBlock* dst, int instrCount = 0); + +/************************************************************************/ +/* The public entry points to output instructions */ +/************************************************************************/ + +public: +void emitIns(instruction ins); + +void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, regNumber tmpReg, int varx, int offs); +void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); + +void emitIns_I(instruction ins, emitAttr attr, ssize_t imm); +void emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t offs); + +void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt = INS_OPTS_NONE); + +void emitIns_Mov( + instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt = INS_OPTS_NONE); + +void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt = INS_OPTS_NONE); + +void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insFlags flags) +{ + _ASSERTE(!"RISCV64: NYI"); +} + +void emitIns_R_R_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm, insOpts opt = INS_OPTS_NONE); + +void emitIns_R_R_R( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insOpts opt = INS_OPTS_NONE); + +void emitIns_R_R_R_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + ssize_t imm, + insOpts opt = INS_OPTS_NONE, + emitAttr attrReg2 = EA_UNKNOWN); + +void emitIns_R_R_I_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt = INS_OPTS_NONE); + +void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4); + +void emitIns_R_C( + instruction ins, emitAttr attr, regNumber reg, regNumber tmpReg, CORINFO_FIELD_HANDLE fldHnd, int offs); + +void emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg); + +void emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg); + +void emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs); + +void emitIns_R_AI(instruction ins, + emitAttr attr, + regNumber reg, + ssize_t disp DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY)); + +enum EmitCallType +{ + + // I have included here, but commented out, all the values used by the x86 emitter. + // However, LOONGARCH has a much reduced instruction set, and so the LOONGARCH emitter only + // supports a subset of the x86 variants. By leaving them commented out, it becomes + // a compile time error if code tries to use them (and hopefully see this comment + // and know why they are unavailable on LOONGARCH), while making it easier to stay + // in-sync with x86 and possibly add them back in if needed. + + EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method + // EC_FUNC_TOKEN_INDIR, // Indirect call to a helper/static/nonvirtual/global method + // EC_FUNC_ADDR, // Direct call to an absolute address + + // EC_FUNC_VIRTUAL, // Call to a virtual method (using the vtable) + EC_INDIR_R, // Indirect call via register + // EC_INDIR_SR, // Indirect call via stack-reference (local var) + // EC_INDIR_C, // Indirect call via static class var + // EC_INDIR_ARD, // Indirect call via an addressing mode + + EC_COUNT +}; + +void emitIns_Call(EmitCallType callType, + CORINFO_METHOD_HANDLE methHnd, + INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE + void* addr, + ssize_t argSize, + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + VARSET_VALARG_TP ptrVars, + regMaskTP gcrefRegs, + regMaskTP byrefRegs, + const DebugInfo& di, + regNumber ireg = REG_NA, + regNumber xreg = REG_NA, + unsigned xmul = 0, + ssize_t disp = 0, + bool isJump = false); + +unsigned emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code); + +unsigned get_curTotalCodeSize(); // bytes of code + +#endif // TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/error.h b/src/coreclr/jit/error.h index 3ce4df25eef963..f31732e28c9f56 100644 --- a/src/coreclr/jit/error.h +++ b/src/coreclr/jit/error.h @@ -174,6 +174,7 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line); #define NYI_ARM(msg) do { } while (0) #define NYI_ARM64(msg) do { } while (0) #define NYI_LOONGARCH64(msg) do { } while (0) +#define NYI_RISCV64(msg) do { } while (0) #elif defined(TARGET_X86) @@ -182,6 +183,7 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line); #define NYI_ARM(msg) do { } while (0) #define NYI_ARM64(msg) do { } while (0) #define NYI_LOONGARCH64(msg) do { } while (0) +#define NYI_RISCV64(msg) do { } while (0) #elif defined(TARGET_ARM) @@ -190,6 +192,7 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line); #define NYI_ARM(msg) NYIRAW("NYI_ARM: " msg) #define NYI_ARM64(msg) do { } while (0) #define NYI_LOONGARCH64(msg) do { } while (0) +#define NYI_RISCV64(msg) do { } while (0) #elif defined(TARGET_ARM64) @@ -198,6 +201,7 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line); #define NYI_ARM(msg) do { } while (0) #define NYI_ARM64(msg) NYIRAW("NYI_ARM64: " msg) #define NYI_LOONGARCH64(msg) do { } while (0) +#define NYI_RISCV64(msg) do { } while (0) #elif defined(TARGET_LOONGARCH64) #define NYI_AMD64(msg) do { } while (0) @@ -205,10 +209,19 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line); #define NYI_ARM(msg) do { } while (0) #define NYI_ARM64(msg) do { } while (0) #define NYI_LOONGARCH64(msg) NYIRAW("NYI_LOONGARCH64: " msg) +#define NYI_RISCV64(msg) do { } while (0) + +#elif defined(TARGET_RISCV64) +#define NYI_AMD64(msg) do { } while (0) +#define NYI_X86(msg) do { } while (0) +#define NYI_ARM(msg) do { } while (0) +#define NYI_ARM64(msg) do { } while (0) +#define NYI_LOONGARCH64(msg) do { } while (0) +#define NYI_RISCV64(msg) NYIRAW("NYI_RISCV64: " msg) #else -#error "Unknown platform, not x86, ARM, LOONGARCH64 or AMD64?" +#error "Unknown platform, not x86, ARM, LOONGARCH64, AMD64, or RISCV64?" #endif diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 16fe9b2f19cc05..6be60dafe37d58 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -1322,7 +1322,7 @@ void CallArgABIInformation::SetByteSize(unsigned byteSize, unsigned byteAlignmen // is a HFA of doubles, since double and float registers overlap. void CallArgABIInformation::SetMultiRegNums() { -#if FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) +#if FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) if (NumRegs == 1) { return; @@ -1343,7 +1343,7 @@ void CallArgABIInformation::SetMultiRegNums() argReg = (regNumber)(argReg + regSize); SetRegNum(regIndex, argReg); } -#endif // FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) +#endif // FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) } //--------------------------------------------------------------- @@ -4311,6 +4311,27 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ addrModeCostSz += 4; } } +#elif defined(TARGET_RISCV64) + if (base) + { + addrModeCostEx += base->GetCostEx(); + addrModeCostSz += base->GetCostSz(); + } + + if (idx) + { + addrModeCostEx += idx->GetCostEx(); + addrModeCostSz += idx->GetCostSz(); + } + if (cns != 0) + { + if (!emitter::isValidSimm12(cns)) + { + // TODO-RISCV64: tune for RISCV64. + addrModeCostEx += 1; + addrModeCostSz += 4; + } + } #else #error "Unknown TARGET" #endif @@ -4755,6 +4776,18 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) costSz = 4; goto COMMON_CNS; + case GT_CNS_LNG: + case GT_CNS_INT: + costEx = 1; + costSz = 4; + goto COMMON_CNS; +#elif defined(TARGET_RISCV64) + // TODO-RISCV64: tune the costs. + case GT_CNS_STR: + costEx = IND_COST_EX + 2; + costSz = 4; + goto COMMON_CNS; + case GT_CNS_LNG: case GT_CNS_INT: costEx = 1; @@ -4833,6 +4866,11 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) // TODO-LoongArch64-CQ: tune the costs. costEx = 2; costSz = 8; +#elif defined(TARGET_RISCV64) + // TODO-RISCV64-CQ: tune the costs. + costEx = 2; + costSz = 8; + //_ASSERTE(!"TODO RISCV64 NYI"); #else #error "Unknown TARGET" #endif @@ -5036,6 +5074,10 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) // TODO-LoongArch64-CQ: tune the costs. costEx = 1; costSz = 4; +#elif defined(TARGET_RISCV64) + // TODO-RISCV64: tune the costs. + costEx = 1; + costSz = 4; #else #error "Unknown TARGET" #endif @@ -8072,7 +8114,7 @@ bool GenTreeOp::UsesDivideByConstOptimized(Compiler* comp) } // TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32 -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (!comp->opts.MinOpts() && ((divisorValue >= 3) || !isSignedDivide)) { // All checks pass we can perform the division operation using a reciprocal multiply. @@ -17025,7 +17067,7 @@ const GenTreeLclVarCommon* GenTree::IsLocalAddrExpr() const // GenTreeLclVarCommon* GenTree::IsImplicitByrefParameterValuePreMorph(Compiler* compiler) { -#if FEATURE_IMPLICIT_BYREFS && !defined(TARGET_LOONGARCH64) // TODO-LOONGARCH64-CQ: enable this. +#if FEATURE_IMPLICIT_BYREFS && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) // TODO-LOONGARCH64-CQ & TODO-RISCV64: enable this. GenTreeLclVarCommon* lcl = OperIsLocal() ? AsLclVarCommon() : nullptr; @@ -17034,7 +17076,7 @@ GenTreeLclVarCommon* GenTree::IsImplicitByrefParameterValuePreMorph(Compiler* co return lcl; } -#endif // FEATURE_IMPLICIT_BYREFS && !defined(TARGET_LOONGARCH64) +#endif // FEATURE_IMPLICIT_BYREFS && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) return nullptr; } @@ -17055,7 +17097,7 @@ GenTreeLclVarCommon* GenTree::IsImplicitByrefParameterValuePreMorph(Compiler* co // GenTreeLclVar* GenTree::IsImplicitByrefParameterValuePostMorph(Compiler* compiler, GenTree** addr) { -#if FEATURE_IMPLICIT_BYREFS && !defined(TARGET_LOONGARCH64) // TODO-LOONGARCH64-CQ: enable this. +#if FEATURE_IMPLICIT_BYREFS && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) // TODO-LOONGARCH64-RISCV64-CQ: enable this. if (!OperIsIndir()) { @@ -17079,7 +17121,7 @@ GenTreeLclVar* GenTree::IsImplicitByrefParameterValuePostMorph(Compiler* compile } } -#endif // FEATURE_IMPLICIT_BYREFS && !defined(TARGET_LOONGARCH64) +#endif // FEATURE_IMPLICIT_BYREFS && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) return nullptr; } @@ -24199,7 +24241,7 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, m_regType[i] = comp->getJitGCType(gcPtrs[i]); } -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) assert((structSize >= TARGET_POINTER_SIZE) && (structSize <= (2 * TARGET_POINTER_SIZE))); uint32_t floatFieldFlags = comp->info.compCompHnd->getLoongArch64PassStructInRegisterFlags(retClsHnd); @@ -24459,7 +24501,7 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx) const resultReg = (regNumber)((unsigned)(REG_FLOATRET) + idx); // V0, V1, V2 or V3 } -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) var_types regType = GetReturnRegType(idx); if (idx == 0) { diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 243474c42eb92f..e24c290fbc685a 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4468,7 +4468,7 @@ struct CallArgABIInformation , StructIntRegs(0) , StructFloatRegs(0) #endif -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) , StructFloatFieldType() #endif , ArgType(TYP_UNDEF) @@ -4508,7 +4508,7 @@ struct CallArgABIInformation unsigned StructFloatRegs; SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR StructDesc; #endif // UNIX_AMD64_ABI -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // For LoongArch64's ABI, the struct which has float field(s) and no more than two fields // may be passed by float register(s). // e.g `struct {int a; float b;}` passed by an integer register and a float register. @@ -5291,7 +5291,7 @@ struct GenTreeCall final : public GenTree bool HasMultiRegRetVal() const { #ifdef FEATURE_MULTIREG_RET -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) return (gtType == TYP_STRUCT) && (gtReturnTypeDesc.GetReturnRegCount() > 1); #else diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index dc327ff6fd4267..fc1fbd8703227a 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -4600,10 +4600,10 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op) // In contrast, we can only use multi-reg calls directly if they have the exact same ABI. // Calling convention equality is a conservative approximation for that check. if (op->IsCall() && (op->AsCall()->GetUnmanagedCallConv() == info.compCallConv) -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // TODO-Review: this seems unnecessary. Return ABI doesn't change under varargs. && !op->AsCall()->IsVarargs() -#endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) ) { return op; diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index b2a2d46892aafa..7a8141e791e64b 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -6229,7 +6229,7 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) default: return false; } -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // TODO-LoongArch64: add some intrinsics. return false; #else @@ -7252,7 +7252,7 @@ bool Compiler::impTailCallRetTypeCompatible(bool allowWideni return true; } -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Jit64 compat: if (callerRetType == TYP_VOID) { @@ -7282,7 +7282,7 @@ bool Compiler::impTailCallRetTypeCompatible(bool allowWideni { return (varTypeIsIntegral(calleeRetType) || isCalleeRetTypMBEnreg) && (callerRetTypeSize == calleeRetTypeSize); } -#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 return false; } diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 91e018cbd8ac8b..4122836678be50 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -69,6 +69,10 @@ const char* CodeGen::genInsName(instruction ins) #define INST(id, nm, ldst, e1) nm, #include "instrs.h" +#elif defined(TARGET_RISCV64) + #define INST(id, nm, ldst, e1) nm, + #include "instrs.h" + #else #error "Unknown TARGET" #endif @@ -340,6 +344,8 @@ void CodeGen::inst_RV(instruction ins, regNumber reg, var_types type, emitAttr s #ifdef TARGET_LOONGARCH64 // inst_RV is not used for LoongArch64, so there is no need to define `emitIns_R`. NYI_LOONGARCH64("inst_RV-----unused on LOONGARCH64----"); +#elif defined(TARGET_RISCV64) + NYI_RISCV64("inst_RV-----unused on RISCV64----"); #else GetEmitter()->emitIns_R(ins, size, reg); #endif @@ -356,7 +362,7 @@ void CodeGen::inst_Mov(var_types dstType, emitAttr size, insFlags flags /* = INS_FLAGS_DONT_CARE */) { -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (isFloatRegType(dstType) != genIsValidFloatReg(dstReg)) { if (dstType == TYP_FLOAT) @@ -378,6 +384,7 @@ void CodeGen::inst_Mov(var_types dstType, else { NYI_LOONGARCH64("CodeGen::inst_Mov dstType"); + NYI_RISCV64("CodeGen::inst_Mov dstType"); } } #endif @@ -470,7 +477,7 @@ void CodeGen::inst_RV_RV_RV(instruction ins, { #ifdef TARGET_ARM GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3, flags); -#elif defined(TARGET_XARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_XARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3); #else NYI("inst_RV_RV_RV"); @@ -546,7 +553,7 @@ void CodeGen::inst_RV_IV( assert(ins != INS_tst); assert(ins != INS_mov); GetEmitter()->emitIns_R_R_I(ins, size, reg, reg, val); -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) GetEmitter()->emitIns_R_R_I(ins, size, reg, reg, val); #else // !TARGET_ARM #ifdef TARGET_AMD64 @@ -604,7 +611,11 @@ void CodeGen::inst_TT_RV(instruction ins, emitAttr size, GenTree* tree, regNumbe #if CPU_LOAD_STORE_ARCH assert(GetEmitter()->emitInsIsStore(ins)); #endif +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(ins, size, reg, REG_NA, varNum, 0); +#else GetEmitter()->emitIns_S_R(ins, size, reg, varNum, 0); +#endif } /***************************************************************************** @@ -918,10 +929,12 @@ void CodeGen::inst_RV_TT(instruction ins, emitAttr size, regNumber op1Reg, GenTr break; case OperandKind::Reg: +#if !defined(TARGET_RISCV64) if (emit->IsMovInstruction(ins)) { emit->emitIns_Mov(ins, size, op1Reg, op2Desc.GetReg(), /* canSkip */ true); } +#endif // !TARGET_RISCV64 else { emit->emitIns_R_R(ins, size, op1Reg, op2Desc.GetReg()); @@ -1072,7 +1085,11 @@ void CodeGen::inst_RV_RV_TT( void CodeGen::inst_ST_RV(instruction ins, TempDsc* tmp, unsigned ofs, regNumber reg, var_types type) { +#ifdef TARGET_RISCV64 + GetEmitter()->emitIns_S_R(ins, emitActualTypeSize(type), reg, REG_NA, tmp->tdTempNum(), ofs); +#else GetEmitter()->emitIns_S_R(ins, emitActualTypeSize(type), reg, tmp->tdTempNum(), ofs); +#endif } #ifdef TARGET_XARCH @@ -1236,6 +1253,7 @@ bool CodeGenInterface::validImmForBL(ssize_t addr) instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg) { NYI_LOONGARCH64("ins_Move_Extend"); + NYI_RISCV64("ins_Move_Extend"); instruction ins = INS_invalid; @@ -1450,6 +1468,19 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* { assert(!"unhandled floating type"); } +#elif defined(TARGET_RISCV64) + if (srcType == TYP_DOUBLE) + { + return INS_fld; + } + else if (srcType == TYP_FLOAT) + { + return INS_flw; + } + else + { + assert(!"unhandled floating type"); + } #else assert(!varTypeIsFloating(srcType)); #endif @@ -1511,6 +1542,29 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* { ins = INS_ld_d; // default ld_d. } +#elif defined(TARGET_RISCV64) + if (varTypeIsByte(srcType)) + { + if (varTypeIsUnsigned(srcType)) + ins = INS_lbu; + else + ins = INS_lb; + } + else if (varTypeIsShort(srcType)) + { + if (varTypeIsUnsigned(srcType)) + ins = INS_lhu; + else + ins = INS_lh; + } + else if (TYP_INT == srcType) + { + ins = INS_lw; + } + else + { + ins = INS_ld; // default ld. + } #else NYI("ins_Load"); #endif @@ -1571,8 +1625,17 @@ instruction CodeGen::ins_Copy(var_types dstType) { return INS_mov; } +#elif defined(TARGET_RISCV64) + if (varTypeIsFloating(dstType)) + { + return dstType == TYP_FLOAT ? INS_fsgnj_s : INS_fsgnj_d; + } + else + { + return INS_mov; + } #else // TARGET_* -#error "Unknown TARGET_" +#error "Unknown TARGET" #endif } @@ -1635,6 +1698,9 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) assert(genIsValidFloatReg(srcReg)); return EA_SIZE(emitActualTypeSize(dstType)) == EA_4BYTE ? INS_movfr2gr_s : INS_movfr2gr_d; } +#elif defined(TARGET_RISCV64) + NYI_RISCV64("TODO RISCV64"); + return INS_invalid; #else // TARGET* #error "Unknown TARGET" #endif @@ -1710,6 +1776,19 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false return aligned ? INS_fstx_s : INS_fst_s; } } +#elif defined(TARGET_RISCV64) + assert(!varTypeIsSIMD(dstType)); + if (varTypeIsFloating(dstType)) + { + if (dstType == TYP_DOUBLE) + { + return INS_fsd; + } + else if (dstType == TYP_FLOAT) + { + return INS_fsw; + } + } #else assert(!varTypeIsSIMD(dstType)); assert(!varTypeIsFloating(dstType)); @@ -1733,6 +1812,15 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false ins = aligned ? INS_stx_w : INS_st_w; else ins = aligned ? INS_stx_d : INS_st_d; +#elif defined(TARGET_RISCV64) + if (varTypeIsByte(dstType)) + ins = INS_sb; + else if (varTypeIsShort(dstType)) + ins = INS_sh; + else if (TYP_INT == dstType) + ins = INS_sw; + else + ins = INS_sd; #else NYI("ins_Store"); #endif @@ -2042,6 +2130,8 @@ void CodeGen::instGen_Set_Reg_To_Zero(emitAttr size, regNumber reg, insFlags fla GetEmitter()->emitIns_Mov(INS_mov, size, reg, REG_ZR, /* canSkip */ true); #elif defined(TARGET_LOONGARCH64) GetEmitter()->emitIns_R_R_I(INS_ori, size, reg, REG_R0, 0); +#elif defined(TARGET_RISCV64) + GetEmitter()->emitIns_R_R_I(INS_addi, size, reg, REG_R0, 0); #else #error "Unknown TARGET" #endif diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 180ad19ad3a96e..970068fc7ae44f 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -55,6 +55,12 @@ enum instruction : unsigned #define INST(id, nm, ldst, e1) INS_##id, #include "instrs.h" + INS_lea, // Not a real instruction. It is used for load the address of stack locals +#elif defined(TARGET_RISCV64) + // TODO RISCV64 + #define INST(id, nm, ldst, e1) INS_##id, + #include "instrs.h" + INS_lea, // Not a real instruction. It is used for load the address of stack locals #else #error Unsupported target architecture @@ -158,7 +164,7 @@ enum insFlags : uint64_t INS_FLAGS_DONT_CARE = 0x00ULL, }; -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // TODO-Cleanup: Move 'insFlags' under TARGET_ARM enum insFlags: unsigned { @@ -337,6 +343,26 @@ enum insBarrier : unsigned INS_BARRIER_REL = INS_BARRIER_FULL,//18, INS_BARRIER_RMB = INS_BARRIER_FULL,//19, }; +#elif defined(TARGET_RISCV64) +enum insOpts : unsigned +{ + INS_OPTS_NONE, + + INS_OPTS_RC, // see ::emitIns_R_C(). + INS_OPTS_RL, // see ::emitIns_R_L(). + INS_OPTS_JALR, // see ::emitIns_J_R(). + INS_OPTS_J, // see ::emitIns_J(). + INS_OPTS_J_cond, // see ::emitIns_J_cond_la(). + INS_OPTS_I, // see ::emitIns_I_la(). + INS_OPTS_C, // see ::emitIns_Call(). + INS_OPTS_RELOC, // see ::emitIns_R_AI(). +}; + +enum insBarrier : unsigned +{ + INS_BARRIER_FULL = 0x33, +}; + #endif #if defined(TARGET_XARCH) diff --git a/src/coreclr/jit/instrs.h b/src/coreclr/jit/instrs.h index aa16547f44be73..1bbbd3f2367e2b 100644 --- a/src/coreclr/jit/instrs.h +++ b/src/coreclr/jit/instrs.h @@ -9,6 +9,8 @@ #include "instrsarm64.h" #elif defined(TARGET_LOONGARCH64) #include "instrsloongarch64.h" +#elif defined(TARGET_RISCV64) +#include "instrsriscv64.h" #else #error Unsupported or unset target architecture #endif // target type diff --git a/src/coreclr/jit/instrsriscv64.h b/src/coreclr/jit/instrsriscv64.h new file mode 100644 index 00000000000000..e08522a99d44bc --- /dev/null +++ b/src/coreclr/jit/instrsriscv64.h @@ -0,0 +1,248 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/***************************************************************************** + * LoongArch64 instructions for JIT compiler + * + * id -- the enum name for the instruction + * nm -- textual name (for assembly dipslay) + * ld/st/cmp -- load/store/compare instruction + * encode -- encoding 1 + * +******************************************************************************/ + +#if !defined(TARGET_RISCV64) +#error Unexpected target type +#endif + +#ifndef INST +#error INST must be defined before including this file. +#endif + +/*****************************************************************************/ +/* The following is RISCV64-specific */ +/*****************************************************************************/ + +// If you're adding a new instruction: +// You need not only to fill in one of these macros describing the instruction, but also: +// * If the instruction writes to more than one destination register, update the function +// emitInsMayWriteMultipleRegs in emitLoongarch64.cpp. + +// clang-format off + +// RV32I & RV64I +INST(invalid, "INVALID", 0, 0x00000000) +INST(nop, "nop", 0, 0x00000013) + +//// R_R +INST(mov, "mov", 0, 0x00000013) + +////R_I +INST(lui, "lui", 0, 0x00000037) +INST(auipc, "auipc", 0, 0x00000017) + +//// R_R_I +INST(addi, "addi", 0, 0x00000013) +INST(slti, "slti", 0, 0x00002013) +INST(sltiu, "sltiu", 0, 0x00003013) +INST(xori, "xori", 0, 0x00004013) +INST(ori, "ori", 0, 0x00006013) +INST(andi, "andi", 0, 0x00007013) +INST(slli, "slli", 0, 0x00001013) +INST(srli, "srli", 0, 0x00005013) +INST(srai, "srai", 0, 0x40005013) + +//// R_R_R +INST(add, "add", 0, 0x00000033) +INST(sub, "sub", 0, 0x40000033) +INST(sll, "sll", 0, 0x00001033) +INST(slt, "slt", 0, 0x00002033) +INST(sltu, "sltu", 0, 0x00003033) +INST(xor, "xor", 0, 0x00004033) +INST(srl, "srl", 0, 0x00005033) +INST(sra, "sra", 0, 0x40005033) +INST(or, "or", 0, 0x00006033) +INST(and, "and", 0, 0x00007033) + +INST(fence, "fence", 0, 0x0000000f) +INST(fence_i, "fence.i", 0, 0x0000100f) + +//// R_I_R +INST(csrrw, "csrrw", 0, 0x00001073) +INST(csrrs, "csrrs", 0, 0x00002073) +INST(csrrc, "csrrc", 0, 0x00003073) + +//// R_I_I +INST(csrrwi, "csrrwi", 0, 0x00005073) +INST(csrrsi, "csrrsi", 0, 0x00006073) +INST(csrrci, "csrrci", 0, 0x00007073) + +INST(ecall, "ecall", 0, 0x00000073) +INST(ebreak, "ebreak", 0, 0x00100073) +INST(uret, "uret", 0, 0x00200073) +INST(sret, "sret", 0, 0x10200073) +INST(mret, "mret", 0, 0x30200073) +INST(wfi, "wfi", 0, 0x10500073) +INST(sfence_vma, "sfence.vma", 0, 0x12000073) + +//// R_R_I +INST(lb, "lb", LD, 0x00000003) +INST(lh, "lh", LD, 0x00001003) +INST(lw, "lw", LD, 0x00002003) +INST(lbu, "lbu", LD, 0x00004003) +INST(lhu, "lhu", LD, 0x00005003) + +INST(sb, "sb", ST, 0x00000023) +INST(sh, "sh", ST, 0x00001023) +INST(sw, "sw", ST, 0x00002023) + +//// R_I +INST(jal, "jal", 0, 0x0000006f) +INST(j, "j", 0, 0x0000006f) +INST(beqz, "beqz", 0, 0x00000063) +INST(bnez, "bnez", 0, 0x00001063) + +//// R_R_I +INST(jalr, "jalr", 0, 0x00000067) +INST(beq, "beq", 0, 0x00000063) +INST(bne, "bne", 0, 0x00001063) +INST(blt, "blt", 0, 0x00004063) +INST(bge, "bge", 0, 0x00005063) +INST(bltu, "bltu", 0, 0x00006063) +INST(bgeu, "bgeu", 0, 0x00007063) + +// RV64I +//// R_R_I +INST(addiw, "addiw", 0, 0x0000001b) +INST(slliw, "slliw", 0, 0x0000101b) +INST(srliw, "srliw", 0, 0x0000501b) +INST(sraiw, "sraiw", 0, 0x4000501b) + +//// R_R_R +INST(addw, "addw", 0, 0x0000003b) +INST(subw, "subw", 0, 0x4000003b) +INST(sllw, "sllw", 0, 0x0000103b) +INST(srlw, "srlw", 0, 0x0000503b) +INST(sraw, "sraw", 0, 0x4000503b) + +//// R_R_I +INST(lwu, "lwu", LD, 0x00006003) +INST(ld, "ld", LD, 0x00003003) +INST(sd, "sd", ST, 0x00003023) + + +// RV32M & RV64M +//// R_R_R +INST(mul, "mul", 0, 0x02000033) +INST(mulh, "mulh", 0, 0x02001033) +INST(mulhsu, "mulhsu", 0, 0x02002033) +INST(mulhu, "mulhu", 0, 0x02003033) +INST(div, "div", 0, 0x02004033) +INST(divu, "divu", 0, 0x02005033) +INST(rem, "rem", 0, 0x02006033) +INST(remu, "remu", 0, 0x02007033) + + +// RV64M +//// R_R_R +INST(mulw, "mulw", 0, 0x0200003b) +INST(divw, "divw", 0, 0x0200403b) +INST(divuw, "divuw", 0, 0x0200503b) +INST(remw, "remw", 0, 0x0200603b) +INST(remuw, "remuw", 0, 0x0200703b) + +// RV32F & RV64D +//// R_R_R_R +INST(fmadd_s, "fmadd.s", 0, 0x00000043) +INST(fmsub_s, "fmsub.s", 0, 0x00000047) +INST(fnmsub_s, "fnmsub.s", 0, 0x0000004b) +INST(fnmadd_s, "fnmadd.s", 0, 0x0000004f) + +//// R_R_R +INST(fadd_s, "fadd.s", 0, 0x00000053) +INST(fsub_s, "fsub.s", 0, 0x08000053) +INST(fmul_s, "fmul.s", 0, 0x10000053) +INST(fdiv_s, "fdiv.s", 0, 0x18000053) +INST(fsqrt_s, "fsqrt.s", 0, 0x58000053) +INST(fsgnj_s, "fsgnj.s", 0, 0x20000053) +INST(fsgnjn_s, "fsgnjn.s", 0, 0x20001053) +INST(fsgnjx_s, "fsgnjx.s", 0, 0x20002053) +INST(fmin_s, "fmin.s", 0, 0x28000053) +INST(fmax_s, "fmax.s", 0, 0x28001053) + +//// R_R +INST(fcvt_w_s, "fcvt.w.s", 0, 0xc0000053) +INST(fcvt_wu_s, "fcvt.wu.s", 0, 0xc0100053) +INST(fmv_x_w, "fmv.x.w", 0, 0xe0000053) + +//// R_R_R +INST(feq_s, "feq.s", 0, 0xa0002053) +INST(flt_s, "flt.s", 0, 0xa0001053) +INST(fle_s, "fle.s", 0, 0xa0000053) + +//// R_R +INST(fclass_s, "fclass.s", 0, 0xe0001053) +INST(fcvt_s_w, "fcvt.s.w", 0, 0xd0000053) +INST(fcvt_s_wu, "fcvt.s.wu", 0, 0xd0100053) +INST(fmv_w_x, "fmv.w.x", 0, 0xf0000053) + +//// R_R_R_R +INST(fmadd_d, "fmadd.d", 0, 0x02000043) +INST(fmsub_d, "fmsub.d", 0, 0x02000047) +INST(fnmsub_d, "fnmsub.d", 0, 0x0200004b) +INST(fnmadd_d, "fnmadd.d", 0, 0x0200004f) + +//// R_R_R +INST(fadd_d, "fadd.d", 0, 0x02000053) +INST(fsub_d, "fsub.d", 0, 0x0a000053) +INST(fmul_d, "fmul.d", 0, 0x12000053) +INST(fdiv_d, "fdiv.d", 0, 0x1a000053) +INST(fsqrt_d, "fsqrt.d", 0, 0x5a000053) +INST(fsgnj_d, "fsgnj.d", 0, 0x22000053) +INST(fsgnjn_d, "fsgnjn.d", 0, 0x22001053) +INST(fsgnjx_d, "fsgnjx.d", 0, 0x22002053) +INST(fmin_d, "fmin.d", 0, 0x2a000053) +INST(fmax_d, "fmax.d", 0, 0x2a001053) + +//// R_R +INST(fcvt_s_d, "fcvt.s.d", 0, 0x40101053) +INST(fcvt_d_s, "fcvt.d.s", 0, 0x42001053) + +//// R_R_R +INST(feq_d, "feq.d", 0, 0xa2002053) +INST(flt_d, "flt.d", 0, 0xa2001053) +INST(fle_d, "fle.d", 0, 0xa2000053) + +//// R_R +INST(fclass_d, "fclass.d", 0, 0xe2001053) +INST(fcvt_w_d, "fcvt.w.d", 0, 0xc2001053) +INST(fcvt_wu_d, "fcvt.wu.d", 0, 0xc2101053) +INST(fcvt_d_w, "fcvt.d.w", 0, 0xd2001053) +INST(fcvt_d_wu, "fcvt.d.wu", 0, 0xd2101053) + +//// R_R_I +INST(flw, "flw", LD, 0x00002007) +INST(fsw, "fsw", ST, 0x00002027) +INST(fld, "fld", LD, 0x00003007) +INST(fsd, "fsd", ST, 0x00003027) + +// RV64F +//// R_R +INST(fcvt_l_s, "fcvt.l.s", 0, 0xc0200053) +INST(fcvt_lu_s, "fcvt.lu.s", 0, 0xc0300053) +INST(fcvt_s_l, "fcvt.s.l", 0, 0xd0200053) +INST(fcvt_s_lu, "fcvt.s.lu", 0, 0xd0300053) + +// RV64D +INST(fcvt_l_d, "fcvt.l.d", 0, 0xc2200053) +INST(fcvt_lu_d, "fcvt.lu.d", 0, 0xc2300053) +INST(fmv_x_d, "fmv.x.d", 0, 0xe2000053) +INST(fcvt_d_l, "fcvt.d.l", 0, 0xd2200053) +INST(fcvt_d_lu, "fcvt.d.lu", 0, 0xd2300053) +INST(fmv_d_x, "fmv.d.x", 0, 0xf2000053) + + +// clang-format on +/*****************************************************************************/ +#undef INST +/*****************************************************************************/ diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index eef88bee6c977b..e1aabfba6d10b3 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -45,6 +45,9 @@ #if defined(HOST_LOONGARCH64) #error Cannot define both HOST_X86 and HOST_LOONGARCH64 #endif +#if defined(HOST_RISCV64) +#error Cannot define both HOST_X86 and HOST_RISCV64 +#endif #elif defined(HOST_AMD64) #if defined(HOST_X86) #error Cannot define both HOST_AMD64 and HOST_X86 @@ -58,6 +61,9 @@ #if defined(HOST_LOONGARCH64) #error Cannot define both HOST_AMD64 and HOST_LOONGARCH64 #endif +#if defined(HOST_RISCV64) +#error Cannot define both HOST_AMD64 and HOST_RISCV64 +#endif #elif defined(HOST_ARM) #if defined(HOST_X86) #error Cannot define both HOST_ARM and HOST_X86 @@ -71,6 +77,9 @@ #if defined(HOST_LOONGARCH64) #error Cannot define both HOST_ARM and HOST_LOONGARCH64 #endif +#if defined(HOST_RISCV64) +#error Cannot define both HOST_ARM and HOST_RISCV64 +#endif #elif defined(HOST_ARM64) #if defined(HOST_X86) #error Cannot define both HOST_ARM64 and HOST_X86 @@ -84,6 +93,9 @@ #if defined(HOST_LOONGARCH64) #error Cannot define both HOST_ARM64 and HOST_LOONGARCH64 #endif +#if defined(HOST_RISCV64) +#error Cannot define both HOST_ARM64 and HOST_RISCV64 +#endif #elif defined(HOST_LOONGARCH64) #if defined(HOST_X86) #error Cannot define both HOST_LOONGARCH64 and HOST_X86 @@ -97,6 +109,25 @@ #if defined(HOST_ARM64) #error Cannot define both HOST_LOONGARCH64 and HOST_ARM64 #endif +#if defined(HOST_RISCV64) +#error Cannot define both HOST_LOONGARCH64 and HOST_RISCV64 +#endif +#elif defined(HOST_RISCV64) +#if defined(HOST_X86) +#error Cannot define both HOST_RISCV64 and HOST_X86 +#endif +#if defined(HOST_AMD64) +#error Cannot define both HOST_RISCV64 and HOST_AMD64 +#endif +#if defined(HOST_ARM) +#error Cannot define both HOST_RISCV64 and HOST_ARM +#endif +#if defined(HOST_ARM64) +#error Cannot define both HOST_RISCV64 and HOST_ARM64 +#endif +#if defined(HOST_LOONGARCH64) +#error Cannot define both HOST_RISCV64 and HOST_LOONGARCH64 +#endif #else #error Unsupported or unset host architecture #endif @@ -114,6 +145,9 @@ #if defined(TARGET_LOONGARCH64) #error Cannot define both TARGET_X86 and TARGET_LOONGARCH64 #endif +#if defined(TARGET_RISCV64) +#error Cannot define both TARGET_X86 and TARGET_RISCV64 +#endif #elif defined(TARGET_AMD64) #if defined(TARGET_X86) #error Cannot define both TARGET_AMD64 and TARGET_X86 @@ -127,6 +161,9 @@ #if defined(TARGET_LOONGARCH64) #error Cannot define both TARGET_AMD64 and TARGET_LOONGARCH64 #endif +#if defined(TARGET_RISCV64) +#error Cannot define both TARGET_AMD64 and TARGET_RISCV64 +#endif #elif defined(TARGET_ARM) #if defined(TARGET_X86) #error Cannot define both TARGET_ARM and TARGET_X86 @@ -140,6 +177,9 @@ #if defined(TARGET_LOONGARCH64) #error Cannot define both TARGET_ARM and TARGET_LOONGARCH64 #endif +#if defined(TARGET_RISCV64) +#error Cannot define both TARGET_ARM and TARGET_RISCV64 +#endif #elif defined(TARGET_ARM64) #if defined(TARGET_X86) #error Cannot define both TARGET_ARM64 and TARGET_X86 @@ -153,6 +193,9 @@ #if defined(TARGET_LOONGARCH64) #error Cannot define both TARGET_ARM64 and TARGET_LOONGARCH64 #endif +#if defined(TARGET_RISCV64) +#error Cannot define both TARGET_ARM64 and TARGET_RISCV64 +#endif #elif defined(TARGET_LOONGARCH64) #if defined(TARGET_X86) #error Cannot define both TARGET_LOONGARCH64 and TARGET_X86 @@ -166,6 +209,26 @@ #if defined(TARGET_ARM64) #error Cannot define both TARGET_LOONGARCH64 and TARGET_ARM64 #endif +#if defined(TARGET_RISCV64) +#error Cannot define both TARGET_LOONGARCH64 and TARGET_RISCV64 +#endif +#elif defined(TARGET_RISCV64) +#if defined(TARGET_X86) +#error Cannot define both TARGET_RISCV64 and TARGET_X86 +#endif +#if defined(TARGET_AMD64) +#error Cannot define both TARGET_RISCV64 and TARGET_AMD64 +#endif +#if defined(TARGET_ARM) +#error Cannot define both TARGET_RISCV64 and TARGET_ARM +#endif +#if defined(TARGET_ARM64) +#error Cannot define both TARGET_RISCV64 and TARGET_ARM64 +#endif +#if defined(TARGET_LOONGARCH64) +#error Cannot define both TARGET_RISCV64 and TARGET_LOONGARCH64 +#endif + #else #error Unsupported or unset target architecture #endif @@ -215,6 +278,8 @@ #define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_ARM64 // 0xAA64 #elif defined(TARGET_LOONGARCH64) #define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_LOONGARCH64 // 0x6264 +#elif defined(TARGET_RISCV64) +#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_RISCV64 // 0x5641 #else #error Unsupported or unset target architecture #endif @@ -269,7 +334,7 @@ typedef ptrdiff_t ssize_t; #define UNIX_LOONGARCH64_ONLY(x) #endif // TARGET_LOONGARCH64 -#if defined(UNIX_AMD64_ABI) || !defined(TARGET_64BIT) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(UNIX_AMD64_ABI) || !defined(TARGET_64BIT) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #define FEATURE_PUT_STRUCT_ARG_STK 1 #endif @@ -281,7 +346,7 @@ typedef ptrdiff_t ssize_t; #define UNIX_AMD64_ABI_ONLY(x) #endif // defined(UNIX_AMD64_ABI) -#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #define MULTIREG_HAS_SECOND_GC_RET 1 #define MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(x) , x #define MULTIREG_HAS_SECOND_GC_RET_ONLY(x) x @@ -295,7 +360,7 @@ typedef ptrdiff_t ssize_t; // the official Arm64 ABI. // Case: splitting 16 byte struct between x7 and stack // LoongArch64's ABI supports FEATURE_ARG_SPLIT which splitting 16 byte struct between a7 and stack. -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #define FEATURE_ARG_SPLIT 1 #else #define FEATURE_ARG_SPLIT 0 diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp index 450e3c80ce1520..69957a175ae9e7 100644 --- a/src/coreclr/jit/jiteh.cpp +++ b/src/coreclr/jit/jiteh.cpp @@ -888,7 +888,7 @@ unsigned Compiler::ehGetCallFinallyRegionIndex(unsigned finallyIndex, bool* inTr assert(finallyIndex != EHblkDsc::NO_ENCLOSING_INDEX); assert(ehGetDsc(finallyIndex)->HasFinallyHandler()); -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) return ehGetDsc(finallyIndex)->ebdGetEnclosingRegionIndex(inTryRegion); #else *inTryRegion = true; diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 7cd22c1d9ed6da..e505af74248504 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -624,7 +624,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un lvaSetClass(varDscInfo->varNum, clsHnd); } - // For ARM, ARM64, LOONGARCH64, and AMD64 varargs, all arguments go in integer registers + // For ARM, ARM64, LOONGARCH64, RISCV64 and AMD64 varargs, all arguments go in integer registers var_types argType = mangleVarArgsType(varDsc->TypeGet()); var_types origArgType = argType; @@ -839,7 +839,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister); } else -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) uint32_t floatFlags = STRUCT_NO_FLOAT_FIELD; var_types argRegTypeInStruct1 = TYP_UNKNOWN; var_types argRegTypeInStruct2 = TYP_UNKNOWN; @@ -914,11 +914,11 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } } else -#endif // defined(TARGET_LOONGARCH64) +#endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) { canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); -#if defined(TARGET_LOONGARCH64) - // On LoongArch64, if there aren't any remaining floating-point registers to pass the argument, +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + // On LoongArch64 and TARGET_RISCV64 , if there aren't any remaining floating-point registers to pass the argument, // integer registers (if any) are used instead. if (!canPassArgInRegisters && varTypeIsFloating(argType)) { @@ -962,13 +962,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } } else -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (argRegTypeInStruct1 != TYP_UNKNOWN) { firstAllocatedRegArgNum = varDscInfo->allocRegArg(argRegTypeInStruct1, 1); } else -#endif // defined(TARGET_LOONGARCH64) +#endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) { firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots); } @@ -1016,7 +1016,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType)); } } -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (argType == TYP_STRUCT) { if (argRegTypeInStruct1 != TYP_UNKNOWN) @@ -1176,7 +1176,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un varDscInfo->setAnyFloatStackArgs(); } -#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // If we needed to use the stack in order to pass this argument then // record the fact that we have used up any remaining registers of this 'type' @@ -1441,11 +1441,11 @@ void Compiler::lvaInitVarDsc(LclVarDsc* varDsc, #if FEATURE_IMPLICIT_BYREFS varDsc->lvIsImplicitByRef = 0; #endif // FEATURE_IMPLICIT_BYREFS -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) varDsc->lvIs4Field1 = 0; varDsc->lvIs4Field2 = 0; varDsc->lvIsSplit = 0; -#endif // TARGET_LOONGARCH64 +#endif // TARGET_LOONGARCH64 || TARGET_RISCV64 // Set the lvType (before this point it is TYP_UNDEF). @@ -2005,7 +2005,7 @@ bool Compiler::StructPromotionHelper::CanPromoteStructVar(unsigned lclNum) { canPromote = false; } -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) else { for (unsigned i = 0; canPromote && (i < fieldCnt); i++) @@ -2121,7 +2121,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum) JITDUMP("Not promoting multi-reg returned struct local V%02u with holes.\n", lclNum); shouldPromote = false; } -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // TODO-PERF - Only do this when the LclVar is used in an argument context // TODO-ARM64 - HFA support should also eliminate the need for this. // TODO-ARM32 - HFA support should also eliminate the need for this. @@ -2138,7 +2138,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum) lclNum, structPromotionInfo.fieldCnt); shouldPromote = false; } -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) else if ((structPromotionInfo.fieldCnt == 2) && (varTypeIsFloating(structPromotionInfo.fields[0].fldType) || varTypeIsFloating(structPromotionInfo.fields[1].fldType))) { @@ -2149,7 +2149,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum) shouldPromote = false; } #endif -#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_LOONGARCH64 +#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_LOONGARCH64 || TARGET_RISCV64 else if (varDsc->lvIsParam && !compiler->lvaIsImplicitByRefLocal(lclNum) && !varDsc->lvIsHfa()) { #if FEATURE_MULTIREG_STRUCT_PROMOTE @@ -2890,7 +2890,7 @@ bool Compiler::lvaIsMultiregStruct(LclVarDsc* varDsc, bool isVarArg) return true; } -#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (howToPassStruct == SPK_ByValue) { assert(type == TYP_STRUCT); @@ -3952,20 +3952,20 @@ size_t LclVarDsc::lvArgStackSize() const #if defined(WINDOWS_AMD64_ABI) // Structs are either passed by reference or can be passed by value using one pointer stackSize = TARGET_POINTER_SIZE; -#elif defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // lvSize performs a roundup. stackSize = this->lvSize(); -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if ((stackSize > TARGET_POINTER_SIZE * 2) && (!this->lvIsHfa())) { // If the size is greater than 16 bytes then it will // be passed by reference. stackSize = TARGET_POINTER_SIZE; } -#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) -#else // !TARGET_ARM64 !WINDOWS_AMD64_ABI !UNIX_AMD64_ABI !TARGET_LOONGARCH64 +#else // !TARGET_ARM64 !WINDOWS_AMD64_ABI !UNIX_AMD64_ABI !TARGET_LOONGARCH64 !TARGET_RISCV64 NYI("Unsupported target."); unreached(); @@ -5486,14 +5486,14 @@ void Compiler::lvaFixVirtualFrameOffsets() // We set FP to be after LR, FP delta += 2 * REGSIZE_BYTES; } -#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) else { // FP is used. JITDUMP("--- delta bump %d for FP frame\n", codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta()); delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta(); } -#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 if (opts.IsOSR()) { @@ -5602,7 +5602,7 @@ void Compiler::lvaFixVirtualFrameOffsets() #endif // FEATURE_FIXED_OUT_ARGS -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // We normally add alignment below the locals between them and the outgoing // arg space area. When we store fp/lr(ra) at the bottom, however, this will // be below the alignment. So we should not apply the alignment adjustment to @@ -5614,7 +5614,7 @@ void Compiler::lvaFixVirtualFrameOffsets() { lvaTable[lvaRetAddrVar].SetStackOffset(REGSIZE_BYTES); } -#endif // TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 } #ifdef TARGET_ARM @@ -6011,7 +6011,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, * when updating the current offset on the stack */ CLANG_FORMAT_COMMENT_ANCHOR; -#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) #if DEBUG // TODO: Remove this noway_assert and replace occurrences of TARGET_POINTER_SIZE with argSize // Also investigate why we are incrementing argOffs for X86 as this seems incorrect @@ -6120,7 +6120,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, argOffs += argSize; } -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (varDsc->lvIsSplit) { @@ -6400,13 +6400,13 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES; } -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Subtract off FP and RA. assert(compCalleeRegsPushed >= 2); stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES; -#else // !TARGET_LOONGARCH64 +#else // !TARGET_LOONGARCH64 !TARGET_RISCV64 #ifdef TARGET_ARM // On ARM32 LR is part of the pushed registers and is always stored at the // top. @@ -6417,7 +6417,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() #endif stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES; -#endif // !TARGET_LOONGARCH64 +#endif // !TARGET_LOONGARCH64 !TARGET_RISCV64 // (2) Account for the remainder of the frame // @@ -6529,7 +6529,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } } -#if defined(FEATURE_EH_FUNCLETS) && (defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)) +#if defined(FEATURE_EH_FUNCLETS) && (defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)) if (lvaPSPSym != BAD_VAR_NUM) { // On ARM/ARM64, if we need a PSPSym we allocate it early since funclets @@ -6538,7 +6538,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs); } -#endif // FEATURE_EH_FUNCLETS && (TARGET_ARMARCH || TARGET_LOONGARCH64) +#endif // FEATURE_EH_FUNCLETS && (TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64) if (mustDoubleAlign) { @@ -7011,7 +7011,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // Reserve the stack space for this variable stkOffs = lvaAllocLocalAndSetVirtualOffset(lclNum, lvaLclSize(lclNum), stkOffs); -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // If we have an incoming register argument that has a promoted field then we // need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar // @@ -7024,7 +7024,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() fieldVarDsc->SetStackOffset(varDsc->GetStackOffset() + fieldVarDsc->lvFldOffset); } } -#endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) } } @@ -7129,10 +7129,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } #endif // TARGET_ARM64 -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) assert(isFramePointerUsed()); // Note that currently we always have a frame pointer stkOffs -= 2 * REGSIZE_BYTES; -#endif // TARGET_LOONGARCH64 +#endif // TARGET_LOONGARCH64 || TARGET_RISCV64 #if FEATURE_FIXED_OUT_ARGS if (lvaOutgoingArgSpaceSize > 0) @@ -7337,7 +7337,7 @@ void Compiler::lvaAlignFrame() lvaIncrementFrameSize(REGSIZE_BYTES); } -#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // The stack on ARM64/LoongArch64 must be 16 byte aligned. @@ -7999,7 +7999,7 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState) compCalleeRegsPushed = CNT_CALLEE_SAVED; -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (compFloatingPointUsed) compCalleeRegsPushed += CNT_CALLEE_SAVED_FLOAT; @@ -8035,7 +8035,7 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState) lvaAssignFrameOffsets(curState); unsigned calleeSavedRegMaxSz = CALLEE_SAVED_REG_MAXSZ; -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (compFloatingPointUsed) { calleeSavedRegMaxSz += CALLEE_SAVED_FLOAT_MAXSZ; @@ -8361,13 +8361,13 @@ Compiler::fgWalkResult Compiler::lvaStressLclFldCB(GenTree** pTree, fgWalkData* // Calculate padding unsigned padding = pComp->lvaStressLclFldPadding(lclNum); -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // We need to support alignment requirements to access memory. unsigned alignment = 1; pComp->codeGen->InferOpSizeAlign(lcl, &alignment); alignment = roundUp(alignment, TARGET_POINTER_SIZE); padding = roundUp(padding, alignment); -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 // Change the variable to a TYP_BLK if (varType != TYP_BLK) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index c5a895a4ebccea..3b53e9da433e68 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -418,7 +418,7 @@ GenTree* Lowering::LowerNode(GenTree* node) LowerCast(node); break; -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) case GT_BOUNDS_CHECK: ContainCheckBoundsChk(node->AsBoundsChk()); break; @@ -454,7 +454,7 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_LSH: case GT_RSH: case GT_RSZ: -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) LowerShift(node->AsOp()); #else ContainCheckShiftRotate(node->AsOp()); @@ -527,7 +527,7 @@ GenTree* Lowering::LowerNode(GenTree* node) LowerStoreLocCommon(node->AsLclVarCommon()); break; -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) case GT_CMPXCHG: CheckImmedAndMakeContained(node, node->AsCmpXchg()->gtOpComparand); break; @@ -1511,7 +1511,7 @@ void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg, bool late) #endif // !defined(TARGET_64BIT) { -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (call->IsVarargs() || comp->opts.compUseSoftFP) { // For vararg call or on armel, reg args should be all integer. @@ -1522,7 +1522,7 @@ void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg, bool late) type = newNode->TypeGet(); } } -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 GenTree* putArg = NewPutArg(call, arg, callArg, type); @@ -1543,7 +1543,7 @@ void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg, bool late) } } -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) //------------------------------------------------------------------------ // LowerFloatArg: Lower float call arguments on the arm/LoongArch64 platform. // @@ -3249,14 +3249,14 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) GenTree* relopOp1 = relop->gtGetOp1(); GenTree* relopOp2 = relop->gtGetOp2(); -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if ((relop->gtNext == jtrue) && relopOp2->IsCnsIntOrI()) { bool useJCMP = false; GenTreeFlags flags = GTF_EMPTY; -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (relop->OperIs(GT_EQ, GT_NE)) { // Codegen will use beq or bne. @@ -3293,7 +3293,7 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) return nullptr; } } -#endif // TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 assert(relop->OperIsCompare()); assert(relop->gtNext == jtrue); @@ -4123,7 +4123,7 @@ void Lowering::LowerStoreSingleRegCallStruct(GenTreeBlk* store) if (regType != TYP_UNDEF) { -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (varTypeIsFloating(call->TypeGet())) { regType = call->TypeGet(); @@ -5827,7 +5827,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) } // TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32 -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (!comp->opts.MinOpts() && (divisorValue >= 3)) { size_t magic; @@ -6115,7 +6115,7 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) return nullptr; } -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) ssize_t magic; int shift; @@ -6218,6 +6218,9 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) #elif defined(TARGET_ARM) // Currently there's no GT_MULHI for ARM32 return nullptr; +#elif defined(TARGET_RISCV64) + NYI_RISCV64("TODO RISCV64"); + return nullptr; #else #error Unsupported or unset target architecture #endif @@ -7439,7 +7442,7 @@ void Lowering::TransformUnusedIndirection(GenTreeIndir* ind, Compiler* comp, Bas ind->ChangeType(comp->gtTypeForNullCheck(ind)); -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool useNullCheck = true; #elif defined(TARGET_ARM) bool useNullCheck = false; @@ -7622,7 +7625,7 @@ void Lowering::TryRetypingFloatingPointStoreToIntegerStore(GenTree* store) #if defined(TARGET_XARCH) || defined(TARGET_ARM) bool shouldSwitchToInteger = true; -#else // TARGET_ARM64 || TARGET_LOONGARCH64 +#else // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 bool shouldSwitchToInteger = FloatingPointUtils::isPositiveZero(dblCns); #endif diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index dc462e8220a68c..88ed6255e2b339 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -167,7 +167,7 @@ class Lowering final : public Phase void ReplaceArgWithPutArgOrBitcast(GenTree** ppChild, GenTree* newNode); GenTree* NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, var_types type); void LowerArg(GenTreeCall* call, CallArg* callArg, bool late); -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) GenTree* LowerFloatArg(GenTree** pArg, CallArg* callArg); GenTree* LowerFloatArgReg(GenTree* arg, regNumber regNum); #endif diff --git a/src/coreclr/jit/lowerriscv64.cpp b/src/coreclr/jit/lowerriscv64.cpp new file mode 100644 index 00000000000000..0d62bbcaf311ac --- /dev/null +++ b/src/coreclr/jit/lowerriscv64.cpp @@ -0,0 +1,823 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Lowering for RISCV64 common code XX +XX XX +XX This encapsulates common logic for lowering trees for the RISCV64 XX +XX architectures. For a more detailed view of what is lowering, please XX +XX take a look at Lower.cpp XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifdef TARGET_RISCV64 // This file is ONLY used for RISCV64 architectures + +#include "jit.h" +#include "sideeffects.h" +#include "lower.h" +#include "lsra.h" + +#ifdef FEATURE_HW_INTRINSICS +#include "hwintrinsic.h" +#endif + +//------------------------------------------------------------------------ +// IsCallTargetInRange: Can a call target address be encoded in-place? +// +// Return Value: +// True if the addr fits into the range. +// +bool Lowering::IsCallTargetInRange(void* addr) +{ + // TODO-RISCV64: using B/BL for optimization. + return false; +} + +//------------------------------------------------------------------------ +// IsContainableImmed: Is an immediate encodable in-place? +// +// Return Value: +// True if the immediate can be folded into an instruction, +// for example small enough and non-relocatable. +// +bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const +{ + if (!varTypeIsFloating(parentNode->TypeGet())) + { + // Make sure we have an actual immediate + if (!childNode->IsCnsIntOrI()) + return false; + if (childNode->AsIntCon()->ImmedValNeedsReloc(comp)) + return false; + + // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had target_ssize_t type. + target_ssize_t immVal = (target_ssize_t)childNode->AsIntCon()->gtIconVal; + + switch (parentNode->OperGet()) + { + case GT_CMPXCHG: + case GT_LOCKADD: + case GT_XADD: + NYI_RISCV64("GT_CMPXCHG,GT_LOCKADD,GT_XADD"); + break; + + case GT_ADD: + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + case GT_BOUNDS_CHECK: + return emitter::isValidSimm12(immVal); + case GT_AND: + case GT_OR: + case GT_XOR: + return emitter::isValidUimm11(immVal); + case GT_JCMP: + assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal)); + return true; + + case GT_STORE_LCL_FLD: + case GT_STORE_LCL_VAR: + if (immVal == 0) + return true; + break; + + default: + break; + } + } + + return false; +} + +//------------------------------------------------------------------------ +// LowerMul: Lower a GT_MUL/GT_MULHI/GT_MUL_LONG node. +// +// Performs contaiment checks. +// +// TODO-RISCV64-CQ: recognize GT_MULs that can be turned into MUL_LONGs, +// as those are cheaper. +// +// Arguments: +// mul - The node to lower +// +// Return Value: +// The next node to lower. +// +GenTree* Lowering::LowerMul(GenTreeOp* mul) +{ + assert(mul->OperIsMul()); + + ContainCheckMul(mul); + + return mul->gtNext; +} + +//------------------------------------------------------------------------ +// LowerBinaryArithmetic: lowers the given binary arithmetic node. +// +// Arguments: +// node - the arithmetic node to lower +// +// Returns: +// The next node to lower. +// +GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) +{ + if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND)) + { + GenTree* opNode = nullptr; + GenTree* notNode = nullptr; + if (binOp->gtGetOp1()->OperIs(GT_NOT)) + { + notNode = binOp->gtGetOp1(); + opNode = binOp->gtGetOp2(); + } + else if (binOp->gtGetOp2()->OperIs(GT_NOT)) + { + notNode = binOp->gtGetOp2(); + opNode = binOp->gtGetOp1(); + } + + if (notNode != nullptr) + { + binOp->gtOp1 = opNode; + binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1(); + binOp->ChangeOper(GT_AND_NOT); + BlockRange().Remove(notNode); + } + } + + ContainCheckBinary(binOp); + + return binOp->gtNext; +} + +//------------------------------------------------------------------------ +// LowerStoreLoc: Lower a store of a lclVar +// +// Arguments: +// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) +// +// Notes: +// This involves: +// - Widening operations of unsigneds. +// +void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) +{ + if (storeLoc->OperIs(GT_STORE_LCL_FLD)) + { + // We should only encounter this for lclVars that are lvDoNotEnregister. + verifyLclFldDoNotEnregister(storeLoc->GetLclNum()); + } + ContainCheckStoreLoc(storeLoc); +} + +//------------------------------------------------------------------------ +// LowerStoreIndir: Determine addressing mode for an indirection, and whether operands are contained. +// +// Arguments: +// node - The indirect store node (GT_STORE_IND) of interest +// +// Return Value: +// None. +// +void Lowering::LowerStoreIndir(GenTreeStoreInd* node) +{ + ContainCheckStoreIndir(node); +} + +//------------------------------------------------------------------------ +// LowerBlockStore: Set block store type +// +// Arguments: +// blkNode - The block store node of interest +// +// Return Value: +// None. +// +void Lowering::LowerBlockStore(GenTreeBlk* blkNode) +{ + GenTree* dstAddr = blkNode->Addr(); + GenTree* src = blkNode->Data(); + unsigned size = blkNode->Size(); + + if (blkNode->OperIsInitBlkOp()) + { + if (src->OperIs(GT_INIT_VAL)) + { + src->SetContained(); + src = src->AsUnOp()->gtGetOp1(); + } + if (blkNode->OperIs(GT_STORE_OBJ)) + { + blkNode->SetOper(GT_STORE_BLK); + } + + if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= INITBLK_UNROLL_LIMIT) && src->OperIs(GT_CNS_INT)) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + + // The fill value of an initblk is interpreted to hold a + // value of (unsigned int8) however a constant of any size + // may practically reside on the evaluation stack. So extract + // the lower byte out of the initVal constant and replicate + // it to a larger constant whose size is sufficient to support + // the largest width store of the desired inline expansion. + + ssize_t fill = src->AsIntCon()->IconValue() & 0xFF; + if (fill == 0) + { + src->SetContained(); + } + else if (size >= REGSIZE_BYTES) + { + fill *= 0x0101010101010101LL; + src->gtType = TYP_LONG; + } + else + { + fill *= 0x01010101; + } + src->AsIntCon()->SetIconValue(fill); + + ContainBlockStoreAddress(blkNode, size, dstAddr, nullptr); + } + else + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + } + } + else + { + assert(src->OperIs(GT_IND, GT_LCL_VAR, GT_LCL_FLD)); + src->SetContained(); + + if (src->OperIs(GT_IND)) + { + // TODO-Cleanup: Make sure that GT_IND lowering didn't mark the source address as contained. + // Sometimes the GT_IND type is a non-struct type and then GT_IND lowering may contain the + // address, not knowing that GT_IND is part of a block op that has containment restrictions. + src->AsIndir()->Addr()->ClearContained(); + } + else if (src->OperIs(GT_LCL_VAR)) + { + // TODO-1stClassStructs: for now we can't work with STORE_BLOCK source in register. + const unsigned srcLclNum = src->AsLclVar()->GetLclNum(); + comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp)); + } + if (blkNode->OperIs(GT_STORE_OBJ)) + { + if (!blkNode->AsObj()->GetLayout()->HasGCPtr()) + { + blkNode->SetOper(GT_STORE_BLK); + } + else if (dstAddr->OperIsLocalAddr() && (size <= CPBLK_UNROLL_LIMIT)) + { + // If the size is small enough to unroll then we need to mark the block as non-interruptible + // to actually allow unrolling. The generated code does not report GC references loaded in the + // temporary register(s) used for copying. + blkNode->SetOper(GT_STORE_BLK); + blkNode->gtBlkOpGcUnsafe = true; + } + } + + // CopyObj or CopyBlk + if (blkNode->OperIs(GT_STORE_OBJ)) + { + assert((dstAddr->TypeGet() == TYP_BYREF) || (dstAddr->TypeGet() == TYP_I_IMPL)); + + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + } + //////////////////////////////////////////////////////////////////////////////////////////////////////// + else if (blkNode->OperIs(GT_STORE_BLK) && (size <= CPBLK_UNROLL_LIMIT)) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + + if (src->OperIs(GT_IND)) + { + ContainBlockStoreAddress(blkNode, size, src->AsIndir()->Addr(), src->AsIndir()); + } + + ContainBlockStoreAddress(blkNode, size, dstAddr, nullptr); + } + else + { + assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK)); + + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + } + } +} + +//------------------------------------------------------------------------ +// ContainBlockStoreAddress: Attempt to contain an address used by an unrolled block store. +// +// Arguments: +// blkNode - the block store node +// size - the block size +// addr - the address node to try to contain +// addrParent - the parent of addr, in case this is checking containment of the source address. +// +void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenTree* addr, GenTree* addrParent) +{ + assert(blkNode->OperIs(GT_STORE_BLK) && (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)); + assert(size < INT32_MAX); + + if (addr->OperIsLocalAddr()) + { + addr->SetContained(); + return; + } + + if (!addr->OperIs(GT_ADD) || addr->gtOverflow() || !addr->AsOp()->gtGetOp2()->OperIs(GT_CNS_INT)) + { + return; + } + + GenTreeIntCon* offsetNode = addr->AsOp()->gtGetOp2()->AsIntCon(); + ssize_t offset = offsetNode->IconValue(); + + // TODO-RISCV64: not including the ldptr and SIMD offset which not used right now. + if (!emitter::isValidSimm12(offset) || !emitter::isValidSimm12(offset + static_cast(size))) + { + return; + } + + if (!IsSafeToContainMem(blkNode, addrParent, addr)) + { + return; + } + + BlockRange().Remove(offsetNode); + + addr->ChangeOper(GT_LEA); + addr->AsAddrMode()->SetIndex(nullptr); + addr->AsAddrMode()->SetScale(0); + addr->AsAddrMode()->SetOffset(static_cast(offset)); + addr->SetContained(); +} + +//------------------------------------------------------------------------ +// LowerPutArgStkOrSplit: Lower a GT_PUTARG_STK/GT_PUTARG_SPLIT. +// +// Arguments: +// putArgNode - The node to lower +// +void Lowering::LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode) +{ + GenTree* src = putArgNode->Data(); + + if (src->TypeIs(TYP_STRUCT)) + { + // STRUCT args (FIELD_LIST / OBJ) will always be contained. + MakeSrcContained(putArgNode, src); + + // Currently, codegen does not support LCL_VAR/LCL_FLD sources, so we morph them to OBJs. + // TODO-ADDR: support the local nodes in codegen and remove this code. + if (src->OperIsLocalRead()) + { + unsigned lclNum = src->AsLclVarCommon()->GetLclNum(); + ClassLayout* layout = nullptr; + GenTree* lclAddr = nullptr; + + if (src->OperIs(GT_LCL_VAR)) + { + layout = comp->lvaGetDesc(lclNum)->GetLayout(); + lclAddr = comp->gtNewLclVarAddrNode(lclNum); + + comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::IsStructArg)); + } + else + { + layout = src->AsLclFld()->GetLayout(); + lclAddr = comp->gtNewLclFldAddrNode(lclNum, src->AsLclFld()->GetLclOffs()); + } + + src->ChangeOper(GT_OBJ); + src->AsObj()->SetAddr(lclAddr); + src->AsObj()->SetLayout(layout); + src->AsObj()->gtBlkOpKind = GenTreeBlk::BlkOpKindInvalid; + src->AsObj()->gtBlkOpGcUnsafe = false; + + BlockRange().InsertBefore(src, lclAddr); + } + + // Codegen supports containment of local addresses under OBJs. + if (src->OperIs(GT_OBJ) && src->AsObj()->Addr()->OperIs(GT_LCL_VAR_ADDR)) + { + // TODO-LOONGARCH64-CQ: support containment of LCL_FLD_ADDR too. + MakeSrcContained(src, src->AsObj()->Addr()); + } + } +} + +//------------------------------------------------------------------------ +// LowerCast: Lower GT_CAST(srcType, DstType) nodes. +// +// Arguments: +// tree - GT_CAST node to be lowered +// +// Return Value: +// None. +// +// Notes: +// Casts from float/double to a smaller int type are transformed as follows: +// GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte) +// GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte) +// GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16) +// GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16) +// +// Note that for the overflow conversions we still depend on helper calls and +// don't expect to see them here. +// i) GT_CAST(float/double, int type with overflow detection) +// + +void Lowering::LowerCast(GenTree* tree) +{ + assert(tree->OperGet() == GT_CAST); + + JITDUMP("LowerCast for: "); + DISPNODE(tree); + JITDUMP("\n"); + + GenTree* op1 = tree->AsOp()->gtOp1; + var_types dstType = tree->CastToType(); + var_types srcType = genActualType(op1->TypeGet()); + + if (varTypeIsFloating(srcType)) + { + noway_assert(!tree->gtOverflow()); + assert(!varTypeIsSmall(dstType)); // fgMorphCast creates intermediate casts when converting from float to small + // int. + } + + assert(!varTypeIsSmall(srcType)); + + // Now determine if we have operands that should be contained. + ContainCheckCast(tree->AsCast()); +} + +//------------------------------------------------------------------------ +// LowerRotate: Lower GT_ROL and GT_ROR nodes. +// +// Arguments: +// tree - the node to lower +// +// Return Value: +// None. +// +void Lowering::LowerRotate(GenTree* tree) +{ + ContainCheckShiftRotate(tree->AsOp()); +} + +#ifdef FEATURE_SIMD +//---------------------------------------------------------------------------------------------- +// Lowering::LowerSIMD: Perform containment analysis for a SIMD intrinsic node. +// +// Arguments: +// simdNode - The SIMD intrinsic node. +// +void Lowering::LowerSIMD(GenTreeSIMD* simdNode) +{ + NYI_RISCV64("LowerSIMD"); +} +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsic: Perform containment analysis for a hardware intrinsic node. +// +// Arguments: +// node - The hardware intrinsic node. +// +void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) +{ + NYI_RISCV64("LowerHWIntrinsic"); +} + +//---------------------------------------------------------------------------------------------- +// Lowering::IsValidConstForMovImm: Determines if the given node can be replaced by a mov/fmov immediate instruction +// +// Arguments: +// node - The hardware intrinsic node. +// +// Returns: +// true if the node can be replaced by a mov/fmov immediate instruction; otherwise, false +// +// IMPORTANT: +// This check may end up modifying node->gtOp1 if it is a cast node that can be removed +bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node) +{ + NYI_RISCV64("IsValidConstForMovImm"); + return false; +} + +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsicCmpOp: Lowers a Vector128 or Vector256 comparison intrinsic +// +// Arguments: +// node - The hardware intrinsic node. +// cmpOp - The comparison operation, currently must be GT_EQ or GT_NE +// +void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) +{ + NYI_RISCV64("LowerHWIntrinsicCmpOp"); +} + +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsicCreate: Lowers a Vector64 or Vector128 Create call +// +// Arguments: +// node - The hardware intrinsic node. +// +void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) +{ + NYI_RISCV64("LowerHWIntrinsicCreate"); +} + +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsicDot: Lowers a Vector64 or Vector128 Dot call +// +// Arguments: +// node - The hardware intrinsic node. +// +void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) +{ + NYI_RISCV64("LowerHWIntrinsicDot"); +} + +#endif // FEATURE_HW_INTRINSICS + +//------------------------------------------------------------------------ +// Containment analysis +//------------------------------------------------------------------------ + +//------------------------------------------------------------------------ +// ContainCheckCallOperands: Determine whether operands of a call should be contained. +// +// Arguments: +// call - The call node of interest +// +// Return Value: +// None. +// +void Lowering::ContainCheckCallOperands(GenTreeCall* call) +{ + // There are no contained operands for RISCV64. +} + +//------------------------------------------------------------------------ +// ContainCheckStoreIndir: determine whether the sources of a STOREIND node should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) +{ + GenTree* src = node->Data(); + if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0)) + { + // an integer zero for 'src' can be contained. + MakeSrcContained(node, src); + } + + ContainCheckIndir(node); +} + +//------------------------------------------------------------------------ +// ContainCheckIndir: Determine whether operands of an indir should be contained. +// +// Arguments: +// indirNode - The indirection node of interest +// +// Notes: +// This is called for both store and load indirections. +// +// Return Value: +// None. +// +void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) +{ + // If this is the rhs of a block copy it will be handled when we handle the store. + if (indirNode->TypeGet() == TYP_STRUCT) + { + return; + } + +#ifdef FEATURE_SIMD + NYI_RISCV64("ContainCheckIndir-SIMD"); +#endif // FEATURE_SIMD + + GenTree* addr = indirNode->Addr(); + if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirNode, addr)) + { + MakeSrcContained(indirNode, addr); + } + else if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)) + { + // These nodes go into an addr mode: + // - GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR is a stack addr mode. + MakeSrcContained(indirNode, addr); + } + else if (addr->OperIs(GT_CLS_VAR_ADDR)) + { + // These nodes go into an addr mode: + // - GT_CLS_VAR_ADDR turns into a constant. + // make this contained, it turns into a constant that goes into an addr mode + MakeSrcContained(indirNode, addr); + } +} + +//------------------------------------------------------------------------ +// ContainCheckBinary: Determine whether a binary op's operands should be contained. +// +// Arguments: +// node - the node we care about +// +void Lowering::ContainCheckBinary(GenTreeOp* node) +{ + // Check and make op2 contained (if it is a containable immediate) + CheckImmedAndMakeContained(node, node->gtOp2); +} + +//------------------------------------------------------------------------ +// ContainCheckMul: Determine whether a mul op's operands should be contained. +// +// Arguments: +// node - the node we care about +// +void Lowering::ContainCheckMul(GenTreeOp* node) +{ + ContainCheckBinary(node); +} + +//------------------------------------------------------------------------ +// ContainCheckDivOrMod: determine which operands of a div/mod should be contained. +// +// Arguments: +// node - the node we care about +// +void Lowering::ContainCheckDivOrMod(GenTreeOp* node) +{ + assert(node->OperIs(GT_MOD, GT_UMOD, GT_DIV, GT_UDIV)); +} + +//------------------------------------------------------------------------ +// ContainCheckShiftRotate: Determine whether a mul op's operands should be contained. +// +// Arguments: +// node - the node we care about +// +void Lowering::ContainCheckShiftRotate(GenTreeOp* node) +{ + GenTree* shiftBy = node->gtOp2; + assert(node->OperIsShiftOrRotate()); + + if (shiftBy->IsCnsIntOrI()) + { + MakeSrcContained(node, shiftBy); + } +} + +//------------------------------------------------------------------------ +// ContainCheckStoreLoc: determine whether the source of a STORE_LCL* should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const +{ + assert(storeLoc->OperIsLocalStore()); + GenTree* op1 = storeLoc->gtGetOp1(); + + if (op1->OperIs(GT_BITCAST)) + { + // If we know that the source of the bitcast will be in a register, then we can make + // the bitcast itself contained. This will allow us to store directly from the other + // type if this node doesn't get a register. + GenTree* bitCastSrc = op1->gtGetOp1(); + if (!bitCastSrc->isContained() && !bitCastSrc->IsRegOptional()) + { + op1->SetContained(); + return; + } + } + + const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc); + +#ifdef FEATURE_SIMD + if (storeLoc->TypeIs(TYP_SIMD8, TYP_SIMD12)) + { + // If this is a store to memory, we can initialize a zero vector in memory from REG_ZR. + if ((op1->IsIntegralConst(0) || op1->IsVectorZero()) && varDsc->lvDoNotEnregister) + { + // For an InitBlk we want op1 to be contained + MakeSrcContained(storeLoc, op1); + } + return; + } +#endif // FEATURE_SIMD + + if (IsContainableImmed(storeLoc, op1)) + { + MakeSrcContained(storeLoc, op1); + } + + // If the source is a containable immediate, make it contained, unless it is + // an int-size or larger store of zero to memory, because we can generate smaller code + // by zeroing a register and then storing it. + var_types type = varDsc->GetRegisterType(storeLoc); + if (IsContainableImmed(storeLoc, op1) && (!op1->IsIntegralConst(0) || varTypeIsSmall(type))) + { + MakeSrcContained(storeLoc, op1); + } +} + +//------------------------------------------------------------------------ +// ContainCheckCast: determine whether the source of a CAST node should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckCast(GenTreeCast* node) +{ + // There are no contained operands for RISCV64. +} + +//------------------------------------------------------------------------ +// ContainCheckCompare: determine whether the sources of a compare node should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckCompare(GenTreeOp* cmp) +{ + CheckImmedAndMakeContained(cmp, cmp->gtOp2); +} + +//------------------------------------------------------------------------ +// ContainCheckSelect : determine whether the source of a select should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckSelect(GenTreeOp* node) +{ + noway_assert(!"GT_SELECT nodes are not supported on riscv64"); +} + +//------------------------------------------------------------------------ +// ContainCheckBoundsChk: determine whether any source of a bounds check node should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node) +{ + assert(node->OperIs(GT_BOUNDS_CHECK)); + if (!CheckImmedAndMakeContained(node, node->GetIndex())) + { + CheckImmedAndMakeContained(node, node->GetArrayLength()); + } +} + +#ifdef FEATURE_SIMD +//---------------------------------------------------------------------------------------------- +// ContainCheckSIMD: Perform containment analysis for a SIMD intrinsic node. +// +// Arguments: +// simdNode - The SIMD intrinsic node. +// +void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) +{ + NYI_RISCV64("ContainCheckSIMD"); +} +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS +//---------------------------------------------------------------------------------------------- +// ContainCheckHWIntrinsic: Perform containment analysis for a hardware intrinsic node. +// +// Arguments: +// node - The hardware intrinsic node. +// +void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) +{ + NYI_RISCV64("ContainCheckHWIntrinsic"); +} +#endif // FEATURE_HW_INTRINSICS + +#endif // TARGET_RISCV64 diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index cc7c52054625b5..092da56d6ff15f 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -686,7 +686,7 @@ LinearScan::LinearScan(Compiler* theCompiler) enregisterLocalVars = compiler->compEnregLocals(); #ifdef TARGET_ARM64 availableIntRegs = (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd); -#elif TARGET_LOONGARCH64 +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd); #else availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd); @@ -1606,7 +1606,7 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc) // but if the variable is tracked the prolog generator would expect it to be in liveIn set, // so an assert in `genFnProlog` will fire. bool isRegCandidate = compiler->compEnregStructLocals() && !varDsc->HasGCPtr(); -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // The LoongArch64's ABI which the float args within a struct maybe passed by integer register // when no float register left but free integer register. isRegCandidate &= !genIsValidFloatReg(varDsc->GetOtherArgReg()); @@ -2611,7 +2611,7 @@ void LinearScan::setFrameType() compiler->rpFrameType = frameType; -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Determine whether we need to reserve a register for large lclVar offsets. if (compiler->compRsvdRegCheck(Compiler::REGALLOC_FRAME_LAYOUT)) { @@ -2621,7 +2621,7 @@ void LinearScan::setFrameType() JITDUMP(" Reserved REG_OPT_RSVD (%s) due to large frame\n", getRegName(REG_OPT_RSVD)); removeMask |= RBM_OPT_RSVD; } -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 if ((removeMask != RBM_NONE) && ((availableIntRegs & removeMask) != 0)) { @@ -2687,7 +2687,7 @@ RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition* assert(refPosition->getInterval() == currentInterval); RegisterType regType = currentInterval->registerType; regMaskTP candidates = refPosition->registerAssignment; -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // The LoongArch64's ABI which the float args maybe passed by integer register // when no float register left but free integer register. if ((candidates & allRegs(regType)) != RBM_NONE) @@ -7774,7 +7774,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) } } -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Next, if this blocks ends with a JCMP, we have to make sure: // 1. Not to copy into the register that JCMP uses // e.g. JCMP w21, BRANCH @@ -7812,7 +7812,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) } } } -#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) VarToRegMap sameVarToRegMap = sharedCriticalVarToRegMap; regMaskTP sameWriteRegs = RBM_NONE; @@ -7888,12 +7888,12 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) sameToReg = REG_NA; } -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (jcmpLocalVarDsc && (jcmpLocalVarDsc->lvVarIndex == outResolutionSetVarIndex)) { sameToReg = REG_NA; } -#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // If the var is live only at those blocks connected by a split edge and not live-in at some of the // target blocks, we will resolve it the same way as if it were in diffResolutionSet and resolution diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 0a067f4909af4a..3eee87b2c276a3 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -773,6 +773,9 @@ class LinearScan : public LinearScanInterface #elif defined(TARGET_LOONGARCH64) static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); +#elif defined(TARGET_RISCV64) + static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); + static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); #else #error Unsupported or unset target architecture #endif // target @@ -1014,7 +1017,7 @@ class LinearScan : public LinearScanInterface bool isUse); #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE -#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // For AMD64 on SystemV machines. This method // is called as replacement for raUpdateRegStateForArg // that is used on Windows. On System V systems a struct can be passed @@ -1023,7 +1026,7 @@ class LinearScan : public LinearScanInterface // For LoongArch64's ABI, a struct can be passed // partially using registers from the 2 register files. void UpdateRegStateForStructArg(LclVarDsc* argDsc); -#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) +#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Update reg state for an incoming register argument void updateRegStateForArg(LclVarDsc* argDsc); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 70dc5dc1648b20..75ff2c1f8836c7 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -597,7 +597,7 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, regNumber physicalReg = genRegNumFromMask(mask); RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask); assert(theInterval != nullptr); -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // The LoongArch64's ABI which the float args maybe passed by integer register // when no float register left but free integer register. assert((regType(theInterval->registerType) == FloatRegisterType) || @@ -1988,7 +1988,7 @@ void LinearScan::insertZeroInitRefPositions() } } -#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) //------------------------------------------------------------------------ // UpdateRegStateForStructArg: // Sets the register state for an argument of type STRUCT. @@ -2036,7 +2036,7 @@ void LinearScan::UpdateRegStateForStructArg(LclVarDsc* argDsc) } } -#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) +#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) //------------------------------------------------------------------------ // updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate @@ -2054,7 +2054,7 @@ void LinearScan::UpdateRegStateForStructArg(LclVarDsc* argDsc) // void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) { -#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // For SystemV-AMD64 and LoongArch64 calls the argDsc // can have 2 registers (for structs.). Handle them here. if (varTypeIsStruct(argDsc)) @@ -2062,7 +2062,7 @@ void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) UpdateRegStateForStructArg(argDsc); } else -#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) +#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) { RegState* intRegState = &compiler->codeGen->intRegState; RegState* floatRegState = &compiler->codeGen->floatRegState; diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp new file mode 100644 index 00000000000000..d26857f5f7c34a --- /dev/null +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -0,0 +1,1240 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Register Requirements for RISCV64 XX +XX XX +XX This encapsulates all the logic for setting register requirements for XX +XX the RISCV64 architecture. XX +XX XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifdef TARGET_RISCV64 + +#include "jit.h" +#include "sideeffects.h" +#include "lower.h" + +//------------------------------------------------------------------------ +// BuildNode: Build the RefPositions for a node +// +// Arguments: +// treeNode - the node of interest +// +// Return Value: +// The number of sources consumed by this node. +// +// Notes: +// Preconditions: +// LSRA Has been initialized. +// +// Postconditions: +// RefPositions have been built for all the register defs and uses required +// for this node. +// +int LinearScan::BuildNode(GenTree* tree) +{ + assert(!tree->isContained()); + int srcCount = 0; + int dstCount = 0; + regMaskTP dstCandidates = RBM_NONE; + regMaskTP killMask = RBM_NONE; + bool isLocalDefUse = false; + + // Reset the build-related members of LinearScan. + clearBuildState(); + + // Set the default dstCount. This may be modified below. + if (tree->IsValue()) + { + dstCount = 1; + if (tree->IsUnusedValue()) + { + isLocalDefUse = true; + } + } + else + { + dstCount = 0; + } + + switch (tree->OperGet()) + { + default: + srcCount = BuildSimple(tree); + break; + + case GT_LCL_VAR: + // We make a final determination about whether a GT_LCL_VAR is a candidate or contained + // after liveness. In either case we don't build any uses or defs. Otherwise, this is a + // load of a stack-based local into a register and we'll fall through to the general + // local case below. + if (checkContainedOrCandidateLclVar(tree->AsLclVar())) + { + return 0; + } + FALLTHROUGH; + case GT_LCL_FLD: + { + srcCount = 0; +#ifdef FEATURE_SIMD + // Need an additional register to read upper 4 bytes of Vector3. + if (tree->TypeGet() == TYP_SIMD12) + { + // We need an internal register different from targetReg in which 'tree' produces its result + // because both targetReg and internal reg will be in use at the same time. + buildInternalFloatRegisterDefForNode(tree, allSIMDRegs()); + setInternalRegsDelayFree = true; + buildInternalRegisterUses(); + } +#endif + BuildDef(tree); + } + break; + + case GT_STORE_LCL_VAR: + if (tree->IsMultiRegLclVar() && isCandidateMultiRegLclVar(tree->AsLclVar())) + { + dstCount = compiler->lvaGetDesc(tree->AsLclVar())->lvFieldCnt; + } + FALLTHROUGH; + + case GT_STORE_LCL_FLD: + srcCount = BuildStoreLoc(tree->AsLclVarCommon()); + break; + + case GT_FIELD_LIST: + // These should always be contained. We don't correctly allocate or + // generate code for a non-contained GT_FIELD_LIST. + noway_assert(!"Non-contained GT_FIELD_LIST"); + srcCount = 0; + break; + + case GT_NO_OP: + case GT_START_NONGC: + srcCount = 0; + assert(dstCount == 0); + break; + + case GT_PROF_HOOK: + srcCount = 0; + assert(dstCount == 0); + killMask = getKillSetForProfilerHook(); + BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + break; + + case GT_START_PREEMPTGC: + // This kills GC refs in callee save regs + srcCount = 0; + assert(dstCount == 0); + BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE); + break; + + case GT_CNS_DBL: + { + // There is no instruction for loading float/double imm directly into FPR. + // Reserve int to load constant from memory (IF_LARGELDC) + buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); + } + FALLTHROUGH; + + case GT_CNS_INT: + { + srcCount = 0; + assert(dstCount == 1); + RefPosition* def = BuildDef(tree); + def->getInterval()->isConstant = true; + } + break; + + case GT_BOX: + case GT_COMMA: + case GT_QMARK: + case GT_COLON: + case GT_ADDR: + srcCount = 0; + assert(dstCount == 0); + unreached(); + break; + + case GT_RETURN: + srcCount = BuildReturn(tree); + killMask = getKillSetForReturn(); + BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + break; + + case GT_RETFILT: + assert(dstCount == 0); + if (tree->TypeGet() == TYP_VOID) + { + srcCount = 0; + } + else + { + assert(tree->TypeGet() == TYP_INT); + srcCount = 1; + BuildUse(tree->gtGetOp1(), RBM_INTRET); + } + break; + + case GT_NOP: + // A GT_NOP is either a passthrough (if it is void, or if it has + // a child), but must be considered to produce a dummy value if it + // has a type but no child. + srcCount = 0; + if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr) + { + assert(dstCount == 1); + BuildDef(tree); + } + else + { + assert(dstCount == 0); + } + break; + + case GT_KEEPALIVE: + assert(dstCount == 0); + srcCount = BuildOperandUses(tree->gtGetOp1()); + break; + + case GT_JTRUE: + srcCount = 0; + assert(dstCount == 0); + break; + + case GT_JMP: + srcCount = 0; + assert(dstCount == 0); + break; + + case GT_SWITCH: + // This should never occur since switch nodes must not be visible at this + // point in the JIT. + srcCount = 0; + noway_assert(!"Switch must be lowered at this point"); + break; + + case GT_JMPTABLE: + srcCount = 0; + assert(dstCount == 1); + BuildDef(tree); + break; + + case GT_SWITCH_TABLE: + buildInternalIntRegisterDefForNode(tree); + srcCount = BuildBinaryUses(tree->AsOp()); + assert(dstCount == 0); + break; + + case GT_ASG: + noway_assert(!"We should never hit any assignment operator in lowering"); + srcCount = 0; + break; + + case GT_ADD: + case GT_SUB: + if (varTypeIsFloating(tree->TypeGet())) + { + // overflow operations aren't supported on float/double types. + assert(!tree->gtOverflow()); + + // No implicit conversions at this stage as the expectation is that + // everything is made explicit by adding casts. + assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet()); + } + else if (tree->gtOverflow()) + { + // Need a register different from target reg to check for overflow. + buildInternalIntRegisterDefForNode(tree); + setInternalRegsDelayFree = true; + } + FALLTHROUGH; + + case GT_AND: + case GT_AND_NOT: + case GT_OR: + case GT_XOR: + case GT_LSH: + case GT_RSH: + case GT_RSZ: + case GT_ROR: + srcCount = BuildBinaryUses(tree->AsOp()); + buildInternalRegisterUses(); + assert(dstCount == 1); + BuildDef(tree); + break; + + case GT_RETURNTRAP: + // this just turns into a compare of its child with an int + // + a conditional call + BuildUse(tree->gtGetOp1()); + srcCount = 1; + assert(dstCount == 0); + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); + BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + break; + + case GT_MUL: + if (tree->gtOverflow()) + { + // Need a register different from target reg to check for overflow. + buildInternalIntRegisterDefForNode(tree); + setInternalRegsDelayFree = true; + } + FALLTHROUGH; + + case GT_MOD: + case GT_UMOD: + case GT_DIV: + case GT_MULHI: + case GT_UDIV: + { + srcCount = BuildBinaryUses(tree->AsOp()); + buildInternalRegisterUses(); + assert(dstCount == 1); + BuildDef(tree); + } + break; + + case GT_INTRINSIC: + { + noway_assert((tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Abs) || + (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Ceiling) || + (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Floor) || + (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Round) || + (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Sqrt)); + + // Both operand and its result must be of the same floating point type. + GenTree* op1 = tree->gtGetOp1(); + assert(varTypeIsFloating(op1)); + assert(op1->TypeGet() == tree->TypeGet()); + + BuildUse(op1); + srcCount = 1; + assert(dstCount == 1); + BuildDef(tree); + } + break; + +#ifdef FEATURE_SIMD + case GT_SIMD: + srcCount = BuildSIMD(tree->AsSIMD()); + break; +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic(), &dstCount); + break; +#endif // FEATURE_HW_INTRINSICS + + case GT_CAST: + assert(dstCount == 1); + srcCount = BuildCast(tree->AsCast()); + break; + + case GT_NEG: + case GT_NOT: + BuildUse(tree->gtGetOp1()); + srcCount = 1; + assert(dstCount == 1); + BuildDef(tree); + break; + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + case GT_JCMP: + srcCount = BuildCmp(tree); + break; + + case GT_CKFINITE: + srcCount = 1; + assert(dstCount == 1); + buildInternalIntRegisterDefForNode(tree); + BuildUse(tree->gtGetOp1()); + BuildDef(tree); + buildInternalRegisterUses(); + break; + + case GT_CMPXCHG: + { + NYI_RISCV64("-----unimplemented on RISCV64 yet----"); + } + break; + + case GT_LOCKADD: + case GT_XORR: + case GT_XAND: + case GT_XADD: + case GT_XCHG: + { + NYI_RISCV64("-----unimplemented on RISCV64 yet----"); + } + break; + + case GT_PUTARG_SPLIT: + srcCount = BuildPutArgSplit(tree->AsPutArgSplit()); + dstCount = tree->AsPutArgSplit()->gtNumRegs; + break; + + case GT_PUTARG_STK: + srcCount = BuildPutArgStk(tree->AsPutArgStk()); + break; + + case GT_PUTARG_REG: + srcCount = BuildPutArgReg(tree->AsUnOp()); + break; + + case GT_CALL: + srcCount = BuildCall(tree->AsCall()); + if (tree->AsCall()->HasMultiRegRetVal()) + { + dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount(); + } + break; + + case GT_BLK: + // These should all be eliminated prior to Lowering. + assert(!"Non-store block node in Lowering"); + srcCount = 0; + break; + + case GT_STORE_BLK: + case GT_STORE_OBJ: + case GT_STORE_DYN_BLK: + srcCount = BuildBlockStore(tree->AsBlk()); + break; + + case GT_INIT_VAL: + // Always a passthrough of its child's value. + assert(!"INIT_VAL should always be contained"); + srcCount = 0; + break; + + case GT_LCLHEAP: + { + assert(dstCount == 1); + + // Need a variable number of temp regs (see genLclHeap() in codegenloongarch64.cpp): + // Here '-' means don't care. + // + // Size? Init Memory? # temp regs + // 0 - 0 + // const and <=UnrollLimit - 0 + // const and UnrollLimit Yes 0 + // Non-const Yes 0 + // Non-const No 2 + // + + GenTree* size = tree->gtGetOp1(); + if (size->IsCnsIntOrI()) + { + assert(size->isContained()); + srcCount = 0; + + size_t sizeVal = size->AsIntCon()->gtIconVal; + + if (sizeVal != 0) + { + // Compute the amount of memory to properly STACK_ALIGN. + // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size. + // This should also help in debugging as we can examine the original size specified with + // localloc. + sizeVal = AlignUp(sizeVal, STACK_ALIGN); + + // For small allocations up to 4 'st' instructions (i.e. 16 to 64 bytes of localloc) + if (sizeVal <= (REGSIZE_BYTES * 2 * 4)) + { + // Need no internal registers + } + else if (!compiler->info.compInitMem) + { + // No need to initialize allocated stack space. + if (sizeVal < compiler->eeGetPageSize()) + { + // Need no internal registers + } + else + { + // We need two registers: regCnt and RegTmp + buildInternalIntRegisterDefForNode(tree); + buildInternalIntRegisterDefForNode(tree); + } + } + } + } + else + { + srcCount = 1; + if (!compiler->info.compInitMem) + { + buildInternalIntRegisterDefForNode(tree); + buildInternalIntRegisterDefForNode(tree); + } + } + + if (!size->isContained()) + { + BuildUse(size); + } + buildInternalRegisterUses(); + BuildDef(tree); + } + break; + + case GT_BOUNDS_CHECK: + { + GenTreeBoundsChk* node = tree->AsBoundsChk(); + // Consumes arrLen & index - has no result + assert(dstCount == 0); + srcCount = BuildOperandUses(node->GetIndex()); + srcCount += BuildOperandUses(node->GetArrayLength()); + } + break; + + case GT_ARR_ELEM: + // These must have been lowered to GT_ARR_INDEX + noway_assert(!"We should never see a GT_ARR_ELEM in lowering"); + srcCount = 0; + assert(dstCount == 0); + break; + + case GT_ARR_INDEX: + { + srcCount = 2; + assert(dstCount == 1); + buildInternalIntRegisterDefForNode(tree); + setInternalRegsDelayFree = true; + + // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple + // times while the result is being computed. + RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj()); + setDelayFree(arrObjUse); + BuildUse(tree->AsArrIndex()->IndexExpr()); + buildInternalRegisterUses(); + BuildDef(tree); + } + break; + + case GT_ARR_OFFSET: + // This consumes the offset, if any, the arrObj and the effective index, + // and produces the flattened offset for this dimension. + srcCount = 2; + if (!tree->AsArrOffs()->gtOffset->isContained()) + { + BuildUse(tree->AsArrOffs()->gtOffset); + srcCount++; + } + BuildUse(tree->AsArrOffs()->gtIndex); + BuildUse(tree->AsArrOffs()->gtArrObj); + assert(dstCount == 1); + buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); + BuildDef(tree); + break; + + case GT_LEA: + { + GenTreeAddrMode* lea = tree->AsAddrMode(); + + GenTree* base = lea->Base(); + GenTree* index = lea->Index(); + int cns = lea->Offset(); + + // This LEA is instantiating an address, so we set up the srcCount here. + srcCount = 0; + if (base != nullptr) + { + srcCount++; + BuildUse(base); + } + if (index != nullptr) + { + srcCount++; + BuildUse(index); + } + assert(dstCount == 1); + + // On RISCV64 we may need a single internal register + // (when both conditions are true then we still only need a single internal register) + if ((index != nullptr) && (cns != 0)) + { + // RISCV64 does not support both Index and offset so we need an internal register + buildInternalIntRegisterDefForNode(tree); + } + else if (!emitter::isValidSimm12(cns)) + { + // This offset can't be contained in the add instruction, so we need an internal register + buildInternalIntRegisterDefForNode(tree); + } + buildInternalRegisterUses(); + BuildDef(tree); + } + break; + + case GT_STOREIND: + { + assert(dstCount == 0); + + if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree->AsStoreInd())) + { + srcCount = BuildGCWriteBarrier(tree); + break; + } + + srcCount = BuildIndir(tree->AsIndir()); + if (!tree->gtGetOp2()->isContained()) + { + BuildUse(tree->gtGetOp2()); + srcCount++; + } + } + break; + + case GT_NULLCHECK: + case GT_IND: + assert(dstCount == (tree->OperIs(GT_NULLCHECK) ? 0 : 1)); + srcCount = BuildIndir(tree->AsIndir()); + break; + + case GT_CATCH_ARG: + srcCount = 0; + assert(dstCount == 1); + BuildDef(tree, RBM_EXCEPTION_OBJECT); + break; + + case GT_INDEX_ADDR: + assert(dstCount == 1); + srcCount = BuildBinaryUses(tree->AsOp()); + buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); + BuildDef(tree); + break; + + } // end switch (tree->OperGet()) + + if (tree->IsUnusedValue() && (dstCount != 0)) + { + isLocalDefUse = true; + } + // We need to be sure that we've set srcCount and dstCount appropriately + assert((dstCount < 2) || tree->IsMultiRegNode()); + assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue())); + assert(!tree->IsUnusedValue() || (dstCount != 0)); + assert(dstCount == tree->GetRegisterDstCount(compiler)); + return srcCount; +} + +#ifdef FEATURE_SIMD +//------------------------------------------------------------------------ +// BuildSIMD: Set the NodeInfo for a GT_SIMD tree. +// +// Arguments: +// tree - The GT_SIMD node of interest +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) +{ + NYI_RISCV64("-----unimplemented on RISCV64 yet----"); + return 0; +} +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS +#include "hwintrinsic.h" +//------------------------------------------------------------------------ +// BuildHWIntrinsic: Set the NodeInfo for a GT_HWINTRINSIC tree. +// +// Arguments: +// tree - The GT_HWINTRINSIC node of interest +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) +{ + NYI_RISCV64("-----unimplemented on RISCV64 yet----"); + return 0; +} +#endif + +//------------------------------------------------------------------------ +// BuildIndir: Specify register requirements for address expression +// of an indirection operation. +// +// Arguments: +// indirTree - GT_IND, GT_STOREIND or block gentree node +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildIndir(GenTreeIndir* indirTree) +{ + // struct typed indirs are expected only on rhs of a block copy, + // but in this case they must be contained. + assert(indirTree->TypeGet() != TYP_STRUCT); + + GenTree* addr = indirTree->Addr(); + GenTree* index = nullptr; + int cns = 0; + + if (addr->isContained()) + { + if (addr->OperGet() == GT_LEA) + { + GenTreeAddrMode* lea = addr->AsAddrMode(); + index = lea->Index(); + cns = lea->Offset(); + + // On LOONGARCH we may need a single internal register + // (when both conditions are true then we still only need a single internal register) + if ((index != nullptr) && (cns != 0)) + { + // LOONGARCH does not support both Index and offset so we need an internal register + buildInternalIntRegisterDefForNode(indirTree); + } + else if (!emitter::isValidSimm12(cns)) + { + // This offset can't be contained in the ldr/str instruction, so we need an internal register + buildInternalIntRegisterDefForNode(indirTree); + } + } + else if (addr->OperGet() == GT_CLS_VAR_ADDR) + { + // Reserve int to load constant from memory (IF_LARGELDC) + buildInternalIntRegisterDefForNode(indirTree); + } + } + +#ifdef FEATURE_SIMD + if (indirTree->TypeGet() == TYP_SIMD12) + { + // If indirTree is of TYP_SIMD12, addr is not contained. See comment in LowerIndir(). + assert(!addr->isContained()); + + // Vector3 is read/written as two reads/writes: 8 byte and 4 byte. + // To assemble the vector properly we would need an additional int register + buildInternalIntRegisterDefForNode(indirTree); + } +#endif // FEATURE_SIMD + + int srcCount = BuildIndirUses(indirTree); + buildInternalRegisterUses(); + + if (!indirTree->OperIs(GT_STOREIND, GT_NULLCHECK)) + { + BuildDef(indirTree); + } + return srcCount; +} + +//------------------------------------------------------------------------ +// BuildCall: Set the NodeInfo for a call. +// +// Arguments: +// call - The call node of interest +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildCall(GenTreeCall* call) +{ + bool hasMultiRegRetVal = false; + const ReturnTypeDesc* retTypeDesc = nullptr; + regMaskTP dstCandidates = RBM_NONE; + + int srcCount = 0; + int dstCount = 0; + if (call->TypeGet() != TYP_VOID) + { + hasMultiRegRetVal = call->HasMultiRegRetVal(); + if (hasMultiRegRetVal) + { + // dst count = number of registers in which the value is returned by call + retTypeDesc = call->GetReturnTypeDesc(); + dstCount = retTypeDesc->GetReturnRegCount(); + } + else + { + dstCount = 1; + } + } + + GenTree* ctrlExpr = call->gtControlExpr; + regMaskTP ctrlExprCandidates = RBM_NONE; + if (call->gtCallType == CT_INDIRECT) + { + // either gtControlExpr != null or gtCallAddr != null. + // Both cannot be non-null at the same time. + assert(ctrlExpr == nullptr); + assert(call->gtCallAddr != nullptr); + ctrlExpr = call->gtCallAddr; + } + + // set reg requirements on call target represented as control sequence. + if (ctrlExpr != nullptr) + { + // we should never see a gtControlExpr whose type is void. + assert(ctrlExpr->TypeGet() != TYP_VOID); + + // In case of fast tail implemented as jmp, make sure that gtControlExpr is + // computed into a register. + if (call->IsFastTailCall()) + { + // Fast tail call - make sure that call target is always computed in volatile registers + // that will not be overridden by epilog sequence. + ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; + assert(ctrlExprCandidates != RBM_NONE); + } + } + else if (call->IsR2ROrVirtualStubRelativeIndir()) + { + // For R2R and VSD we have stub address in REG_R2R_INDIRECT_PARAM + // and will load call address into the temp register from this register. + regMaskTP candidates = RBM_NONE; + if (call->IsFastTailCall()) + { + candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; + assert(candidates != RBM_NONE); + } + + buildInternalIntRegisterDefForNode(call, candidates); + } + + RegisterType registerType = call->TypeGet(); + + // Set destination candidates for return value of the call. + + if (hasMultiRegRetVal) + { + assert(retTypeDesc != nullptr); + dstCandidates = retTypeDesc->GetABIReturnRegs(); + } + else if (varTypeUsesFloatArgReg(registerType)) + { + dstCandidates = RBM_FLOATRET; + } + else if (registerType == TYP_LONG) + { + dstCandidates = RBM_LNGRET; + } + else + { + dstCandidates = RBM_INTRET; + } + + // First, count reg args + // Each register argument corresponds to one source. + bool callHasFloatRegArgs = false; + + for (CallArg& arg : call->gtArgs.LateArgs()) + { + CallArgABIInformation& abiInfo = arg.AbiInfo; + GenTree* argNode = arg.GetLateNode(); + +#ifdef DEBUG + regNumber argReg = abiInfo.GetRegNum(); +#endif + + if (argNode->gtOper == GT_PUTARG_STK) + { + // late arg that is not passed in a register + assert(abiInfo.GetRegNum() == REG_STK); + // These should never be contained. + assert(!argNode->isContained()); + continue; + } + + // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct + if (argNode->OperGet() == GT_FIELD_LIST) + { + assert(argNode->isContained()); + + // There could be up to 2 PUTARG_REGs in the list. + for (GenTreeFieldList::Use& use : argNode->AsFieldList()->Uses()) + { +#ifdef DEBUG + assert(use.GetNode()->OperIs(GT_PUTARG_REG)); +#endif + BuildUse(use.GetNode(), genRegMask(use.GetNode()->GetRegNum())); + srcCount++; + } + } + else if (argNode->OperGet() == GT_PUTARG_SPLIT) + { + unsigned regCount = argNode->AsPutArgSplit()->gtNumRegs; + assert(regCount == abiInfo.NumRegs); + for (unsigned int i = 0; i < regCount; i++) + { + BuildUse(argNode, genRegMask(argNode->AsPutArgSplit()->GetRegNumByIdx(i)), i); + } + srcCount += regCount; + } + else + { + assert(argNode->OperIs(GT_PUTARG_REG)); + assert(argNode->GetRegNum() == argReg); + HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs); + { + BuildUse(argNode, genRegMask(argNode->GetRegNum())); + srcCount++; + } + } + } + +#ifdef DEBUG + // Now, count stack args + // Note that these need to be computed into a register, but then + // they're just stored to the stack - so the reg doesn't + // need to remain live until the call. In fact, it must not + // because the code generator doesn't actually consider it live, + // so it can't be spilled. + + for (CallArg& arg : call->gtArgs.EarlyArgs()) + { + GenTree* argNode = arg.GetEarlyNode(); + + // Skip arguments that have been moved to the Late Arg list + if (arg.GetLateNode() == nullptr) + { + // PUTARG_SPLIT nodes must be in the gtCallLateArgs list, since they + // define registers used by the call. + assert(argNode->OperGet() != GT_PUTARG_SPLIT); + if (argNode->gtOper == GT_PUTARG_STK) + { + assert(arg.AbiInfo.GetRegNum() == REG_STK); + } + else + { + assert(!argNode->IsValue() || argNode->IsUnusedValue()); + } + } + } +#endif // DEBUG + + // If it is a fast tail call, it is already preferenced to use IP0. + // Therefore, no need set src candidates on call tgt again. + if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr)) + { + // Don't assign the call target to any of the argument registers because + // we will use them to also pass floating point arguments as required + // by LOONGARCH64 ABI. + ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS); + } + + if (ctrlExpr != nullptr) + { + BuildUse(ctrlExpr, ctrlExprCandidates); + srcCount++; + } + + buildInternalRegisterUses(); + + // Now generate defs and kills. + regMaskTP killMask = getKillSetForCall(call); + BuildDefsWithKills(call, dstCount, dstCandidates, killMask); + + // No args are placed in registers anymore. + placedArgRegs = RBM_NONE; + numPlacedArgLocals = 0; + return srcCount; +} + +//------------------------------------------------------------------------ +// BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node +// +// Arguments: +// argNode - a GT_PUTARG_STK node +// +// Return Value: +// The number of sources consumed by this node. +// +// Notes: +// Set the child node(s) to be contained when we have a multireg arg +// +int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) +{ + assert(argNode->gtOper == GT_PUTARG_STK); + + GenTree* putArgChild = argNode->gtGetOp1(); + + int srcCount = 0; + + // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct + if (putArgChild->TypeIs(TYP_STRUCT) || putArgChild->OperIs(GT_FIELD_LIST)) + { + // We will use store instructions that each write a register sized value + + if (putArgChild->OperIs(GT_FIELD_LIST)) + { + assert(putArgChild->isContained()); + // We consume all of the items in the GT_FIELD_LIST + for (GenTreeFieldList::Use& use : putArgChild->AsFieldList()->Uses()) + { + BuildUse(use.GetNode()); + srcCount++; + } + } + else + { + // We can use a ld/st sequence so we need two internal registers for LOONGARCH64. + buildInternalIntRegisterDefForNode(argNode); + buildInternalIntRegisterDefForNode(argNode); + + if (putArgChild->OperGet() == GT_OBJ) + { + assert(putArgChild->isContained()); + GenTree* objChild = putArgChild->gtGetOp1(); + if (objChild->OperGet() == GT_LCL_VAR_ADDR) + { + // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR + // as one contained operation, and there are no source registers. + // + assert(objChild->isContained()); + } + else + { + // We will generate all of the code for the GT_PUTARG_STK and its child node + // as one contained operation + // + srcCount = BuildOperandUses(objChild); + } + } + else + { + // No source registers. + putArgChild->OperIs(GT_LCL_VAR); + } + } + } + else + { + assert(!putArgChild->isContained()); + srcCount = BuildOperandUses(putArgChild); + } + buildInternalRegisterUses(); + return srcCount; +} + +//------------------------------------------------------------------------ +// BuildPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node +// +// Arguments: +// argNode - a GT_PUTARG_SPLIT node +// +// Return Value: +// The number of sources consumed by this node. +// +// Notes: +// Set the child node(s) to be contained +// +int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) +{ + _ASSERTE(!"TODO RISCV64 NYI"); + return 0; +} + +//------------------------------------------------------------------------ +// BuildBlockStore: Build the RefPositions for a block store node. +// +// Arguments: +// blkNode - The block store node of interest +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) +{ + GenTree* dstAddr = blkNode->Addr(); + GenTree* src = blkNode->Data(); + unsigned size = blkNode->Size(); + + GenTree* srcAddrOrFill = nullptr; + + regMaskTP dstAddrRegMask = RBM_NONE; + regMaskTP srcRegMask = RBM_NONE; + regMaskTP sizeRegMask = RBM_NONE; + + if (blkNode->OperIsInitBlkOp()) + { + if (src->OperIs(GT_INIT_VAL)) + { + assert(src->isContained()); + src = src->AsUnOp()->gtGetOp1(); + } + + srcAddrOrFill = src; + + switch (blkNode->gtBlkOpKind) + { + case GenTreeBlk::BlkOpKindUnroll: + { + if (dstAddr->isContained()) + { + // Since the dstAddr is contained the address will be computed in CodeGen. + // This might require an integer register to store the value. + buildInternalIntRegisterDefForNode(blkNode); + } + + const bool isDstRegAddrAlignmentKnown = dstAddr->OperIsLocalAddr(); + + if (isDstRegAddrAlignmentKnown && (size > FP_REGSIZE_BYTES)) + { + // TODO-LoongArch64: For larger block sizes CodeGen can choose to use 16-byte SIMD instructions. + // here just used a temp register. + buildInternalIntRegisterDefForNode(blkNode); + } + } + break; + + case GenTreeBlk::BlkOpKindHelper: + assert(!src->isContained()); + dstAddrRegMask = RBM_ARG_0; + srcRegMask = RBM_ARG_1; + sizeRegMask = RBM_ARG_2; + break; + + default: + unreached(); + } + } + else + { + if (src->OperIs(GT_IND)) + { + assert(src->isContained()); + srcAddrOrFill = src->AsIndir()->Addr(); + } + + if (blkNode->OperIs(GT_STORE_OBJ)) + { + // We don't need to materialize the struct size but we still need + // a temporary register to perform the sequence of loads and stores. + // We can't use the special Write Barrier registers, so exclude them from the mask + regMaskTP internalIntCandidates = + allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); + buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); + + if (size >= 2 * REGSIZE_BYTES) + { + // TODO-RISCV64: We will use ld/st paired to reduce code size and improve performance + // so we need to reserve an extra internal register. + buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); + } + + // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF. + dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF; + + // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. + // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, + // which is killed by a StoreObj (and thus needn't be reserved). + if (srcAddrOrFill != nullptr) + { + assert(!srcAddrOrFill->isContained()); + srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF; + } + } + else + { + switch (blkNode->gtBlkOpKind) + { + case GenTreeBlk::BlkOpKindUnroll: + buildInternalIntRegisterDefForNode(blkNode); + break; + + case GenTreeBlk::BlkOpKindHelper: + dstAddrRegMask = RBM_ARG_0; + if (srcAddrOrFill != nullptr) + { + assert(!srcAddrOrFill->isContained()); + srcRegMask = RBM_ARG_1; + } + sizeRegMask = RBM_ARG_2; + break; + + default: + unreached(); + } + } + } + + if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (sizeRegMask != RBM_NONE)) + { + // Reserve a temp register for the block size argument. + buildInternalIntRegisterDefForNode(blkNode, sizeRegMask); + } + + int useCount = 0; + + if (!dstAddr->isContained()) + { + useCount++; + BuildUse(dstAddr, dstAddrRegMask); + } + else if (dstAddr->OperIsAddrMode()) + { + useCount += BuildAddrUses(dstAddr->AsAddrMode()->Base()); + } + + if (srcAddrOrFill != nullptr) + { + if (!srcAddrOrFill->isContained()) + { + useCount++; + BuildUse(srcAddrOrFill, srcRegMask); + } + else if (srcAddrOrFill->OperIsAddrMode()) + { + useCount += BuildAddrUses(srcAddrOrFill->AsAddrMode()->Base()); + } + } + + if (blkNode->OperIs(GT_STORE_DYN_BLK)) + { + useCount++; + BuildUse(blkNode->AsStoreDynBlk()->gtDynamicSize, sizeRegMask); + } + + buildInternalRegisterUses(); + regMaskTP killMask = getKillSetForBlockStore(blkNode); + BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask); + return useCount; +} + +//------------------------------------------------------------------------ +// BuildCast: Set the NodeInfo for a GT_CAST. +// +// Arguments: +// cast - The GT_CAST node +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildCast(GenTreeCast* cast) +{ + int srcCount = BuildOperandUses(cast->CastOp()); + BuildDef(cast); + + return srcCount; +} + +#endif // TARGET_RISCV64 diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index a34552aa1f1972..1a9ab33fdf2955 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -298,7 +298,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType)) { if (srcType == TYP_FLOAT -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Arm64: src = float, dst is overflow conversion. // This goes through helper and hence src needs to be converted to double. && tree->gtOverflow() @@ -334,7 +334,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) if (!tree->gtOverflow()) { // ARM64 and LoongArch64 optimize all non-overflow checking conversions -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) return nullptr; #else switch (dstType) @@ -362,7 +362,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) default: unreached(); } -#endif // TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 } else { @@ -1725,8 +1725,8 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) if (setupArg->OperIsCopyBlkOp()) { setupArg = comp->fgMorphCopyBlock(setupArg); -#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // On LoongArch64, "getPrimitiveTypeForStruct" will incorrectly return "TYP_LONG" // for "struct { float, float }", and retyping to a primitive here will cause the // multi-reg morphing to not kick in (the struct in question needs to be passed in @@ -1745,7 +1745,7 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) scalarType = comp->getPrimitiveTypeForStruct(structSize, clsHnd, m_isVarArgs); } -#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) +#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) } // scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 => @@ -2342,7 +2342,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call passUsingFloatRegs = false; -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) assert(!callIsVararg && !isHfaArg); passUsingFloatRegs = varTypeUsesFloatReg(argSigType); @@ -2395,7 +2395,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call } #endif // UNIX_AMD64_ABI -#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (isStructArg) { if (isHfaArg) @@ -2452,7 +2452,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call Compiler::structPassingKind howToPassStruct; structBaseType = comp->getArgTypeForStruct(argSigClass, &howToPassStruct, callIsVararg, structSize); passStructByRef = (howToPassStruct == Compiler::SPK_ByReference); -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (!passStructByRef) { assert((howToPassStruct == Compiler::SPK_ByValue) || (howToPassStruct == Compiler::SPK_PrimitiveType)); @@ -2530,7 +2530,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call argAlignBytes = comp->eeGetArgSizeAlignment(argSigType, isFloatHfa); } -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) regNumber nextOtherRegNum = REG_STK; #endif // @@ -2627,7 +2627,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call } } -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (passUsingFloatRegs) { // Check if the last register needed is still in the fp argument register range. @@ -2694,7 +2694,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call nextOtherRegNum = REG_STK; } } -#else // not TARGET_ARM or TARGET_ARM64 or TARGET_LOONGARCH64 +#else // not TARGET_ARM or TARGET_ARM64 or TARGET_LOONGARCH64 or TARGET_RISCV64 #if defined(UNIX_AMD64_ABI) @@ -2856,7 +2856,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call } #endif -#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) assert(size <= 2); if (size == 2) @@ -2872,7 +2872,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call // Set up the next intArgRegNum and fltArgRegNum values. if (!isBackFilled) { -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Increment intArgRegNum by 'size' registers if (nonStdRegNum == REG_NA) { @@ -2983,7 +2983,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call } } } -#endif // defined(TARGET_LOONGARCH64) +#endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) } } else // We have an argument that is not passed in a register @@ -3283,12 +3283,12 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) } } #endif // UNIX_AMD64_ABI -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if ((passingSize != structSize) && !argIsLocal) { makeOutArgCopy = true; } -#endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) } else if (argObj->TypeGet() != structBaseType) { @@ -3589,7 +3589,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) GenTree* argNode = arg->GetNode(); assert(varTypeIsStruct(argNode)); -#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) NYI("fgMorphMultiregStructArg requires implementation for this target"); #endif @@ -3678,7 +3678,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) offset += 8; } else -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if ((arg->AbiInfo.StructFloatFieldType[inx] != TYP_UNDEF) && !varTypeIsGC(getSlotType(offset / TARGET_POINTER_SIZE))) { @@ -3686,7 +3686,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) offset += (structSize > TARGET_POINTER_SIZE) ? 8 : 4; } else -#endif // TARGET_LOONGARCH64 +#endif // TARGET_LOONGARCH64 || TARGET_RISCV64 { elems[inx].Type = getSlotType(inx); offset += TARGET_POINTER_SIZE; @@ -3713,11 +3713,11 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) case 2: lastElem.Type = TYP_SHORT; break; -#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) case 4: lastElem.Type = TYP_INT; break; -#endif // (TARGET_ARM64) || (UNIX_AMD64_ABI) || (TARGET_LOONGARCH64) +#endif // (TARGET_ARM64) || (UNIX_AMD64_ABI) || (TARGET_LOONGARCH64) || (TARGET_RISCV64) default: noway_assert(!"Cannot load odd sized last element from arbitrary source"); break; @@ -3739,7 +3739,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) loadExtent = elems[idx].Offset + genTypeSize(elems[idx].Type); } -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // For LoongArch64's ABI, the struct {long a; float b;} may be passed // by integer and float registers and it needs to include the padding here. assert(roundUp(structSize, TARGET_POINTER_SIZE) == roundUp(loadExtent, TARGET_POINTER_SIZE)); @@ -3752,7 +3752,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) { assert(loadExtent == structSize); } -#endif // TARGET_LOONGARCH64 +#endif // TARGET_LOONGARCH64 || TARGET_RISCV64 #endif // DEBUG // We should still have a TYP_STRUCT @@ -3871,7 +3871,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) newArg->AddField(this, argIndir, offset, argIndir->TypeGet()); } -#ifndef TARGET_LOONGARCH64 +#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) // Make sure we loaded exactly the required amount of bytes. // But for LoongArch64's ABI, the struct {long a; float b;} may be passed // by integer and float registers and it needs to include the padding here. @@ -9652,9 +9652,9 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA break; -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) case GT_DIV: -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) case GT_MOD: #endif if (!varTypeIsFloating(tree->gtType)) @@ -9673,7 +9673,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA } break; case GT_UDIV: -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) case GT_UMOD: #endif // We do not need to throw if the second operand is a non-zero constant. @@ -9683,7 +9683,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA } break; -#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) case GT_ADD: diff --git a/src/coreclr/jit/register.h b/src/coreclr/jit/register.h index ca90673e85adfe..e8a5afeed08ede 100644 --- a/src/coreclr/jit/register.h +++ b/src/coreclr/jit/register.h @@ -126,6 +126,9 @@ REGDEF(STK, 32+XMMBASE, 0x0000, "STK" ) #elif defined(TARGET_LOONGARCH64) #include "registerloongarch64.h" +#elif defined(TARGET_RISCV64) + #include "registerriscv64.h" + #else #error Unsupported or unset target architecture #endif // target type diff --git a/src/coreclr/jit/registerriscv64.h b/src/coreclr/jit/registerriscv64.h new file mode 100644 index 00000000000000..fea2e3cf5e1a3a --- /dev/null +++ b/src/coreclr/jit/registerriscv64.h @@ -0,0 +1,106 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// clang-format off + +/*****************************************************************************/ +/*****************************************************************************/ +#ifndef REGDEF +#error Must define REGDEF macro before including this file +#endif +#ifndef REGALIAS +#define REGALIAS(alias, realname) +#endif + +#define RMASK(x) (1ULL << (x)) + +/* +REGDEF(name, rnum, mask, sname) */ +REGDEF(R0, 0, 0x0001, "zero") +REGDEF(RA, 1, 0x0002, "ra" ) +REGDEF(SP, 2, 0x0004, "sp" ) +REGDEF(GP, 3, 0x0008, "gp" ) +REGDEF(TP, 4, 0x0010, "tp" ) +REGDEF(T0, 5, 0x0020, "t0" ) +REGDEF(T1, 6, 0x0040, "t1" ) +REGDEF(T2, 7, 0x0080, "t2" ) +REGDEF(FP, 8, 0x0100, "fp" ) +REGDEF(S1, 9, 0x0200, "s1" ) +REGDEF(A0, 10, 0x0400, "a0" ) +REGDEF(A1, 11, 0x0800, "a1" ) +REGDEF(A2, 12, 0x1000, "a2" ) +REGDEF(A3, 13, 0x2000, "a3" ) +REGDEF(A4, 14, 0x4000, "a4" ) +REGDEF(A5, 15, 0x8000, "a5" ) +REGDEF(A6, 16, 0x10000, "a6" ) +REGDEF(A7, 17, 0x20000, "a7" ) +REGDEF(S2, 18, 0x40000, "s2" ) +REGDEF(S3, 19, 0x80000, "s3" ) +REGDEF(S4, 20, 0x100000, "s4" ) +REGDEF(S5, 21, 0x200000, "s5" ) +REGDEF(S6, 22, 0x400000, "s6" ) +REGDEF(S7, 23, 0x800000, "s7" ) +REGDEF(S8, 24, 0x1000000, "s8" ) +REGDEF(S9, 25, 0x2000000, "s9" ) +REGDEF(S10, 26, 0x4000000, "s10" ) +REGDEF(S11, 27, 0x8000000, "s11" ) +REGDEF(T3, 28, 0x10000000, "t3" ) +REGDEF(T4, 29, 0x20000000, "t4" ) +REGDEF(T5, 30, 0x40000000, "t5" ) +REGDEF(T6, 31, 0x80000000, "t6" ) + +REGALIAS(R8, FP) + +#define FBASE 32 +#define FMASK(x) (1ULL << (FBASE+(x))) + +/* +REGDEF(name, rnum, mask, sname) */ +REGDEF(F0, 0+FBASE, FMASK(0), "f0") +REGDEF(F1, 1+FBASE, FMASK(1), "f1") +REGDEF(F2, 2+FBASE, FMASK(2), "f2") +REGDEF(F3, 3+FBASE, FMASK(3), "f3") +REGDEF(F4, 4+FBASE, FMASK(4), "f4") +REGDEF(F5, 5+FBASE, FMASK(5), "f5") +REGDEF(F6, 6+FBASE, FMASK(6), "f6") +REGDEF(F7, 7+FBASE, FMASK(7), "f7") +REGDEF(F8, 8+FBASE, FMASK(8), "f8") +REGDEF(F9, 9+FBASE, FMASK(9), "f9") +REGDEF(F10, 10+FBASE, FMASK(10), "f10") +REGDEF(F11, 11+FBASE, FMASK(11), "f11") +REGDEF(F12, 12+FBASE, FMASK(12), "f12") +REGDEF(F13, 13+FBASE, FMASK(13), "f13") +REGDEF(F14, 14+FBASE, FMASK(14), "f14") +REGDEF(F15, 15+FBASE, FMASK(15), "f15") +REGDEF(F16, 16+FBASE, FMASK(16), "f16") +REGDEF(F17, 17+FBASE, FMASK(17), "f17") +REGDEF(F18, 18+FBASE, FMASK(18), "f18") +REGDEF(F19, 19+FBASE, FMASK(19), "f19") +REGDEF(F20, 20+FBASE, FMASK(20), "f20") +REGDEF(F21, 21+FBASE, FMASK(21), "f21") +REGDEF(F22, 22+FBASE, FMASK(22), "f22") +REGDEF(F23, 23+FBASE, FMASK(23), "f23") +REGDEF(F24, 24+FBASE, FMASK(24), "f24") +REGDEF(F25, 25+FBASE, FMASK(25), "f25") +REGDEF(F26, 26+FBASE, FMASK(26), "f26") +REGDEF(F27, 27+FBASE, FMASK(27), "f27") +REGDEF(F28, 28+FBASE, FMASK(28), "f28") +REGDEF(F29, 29+FBASE, FMASK(29), "f29") +REGDEF(F30, 30+FBASE, FMASK(30), "f30") +REGDEF(F31, 31+FBASE, FMASK(31), "f31") + +// The registers with values 64 (NBASE) and above are not real register numbers +#define NBASE 64 + +REGDEF(STK, 0+NBASE, 0x0000, "STK") + +/*****************************************************************************/ +#undef RMASK +#undef VMASK +#undef VBASE +#undef NBASE +#undef REGDEF +#undef REGALIAS +/*****************************************************************************/ + +// clang-format on diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h index 9c1a1041eecf87..dc0225c74db7c5 100644 --- a/src/coreclr/jit/regset.h +++ b/src/coreclr/jit/regset.h @@ -123,7 +123,7 @@ class RegSet private: regMaskTP _rsMaskVars; // backing store for rsMaskVars property -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog #endif // TARGET_ARMARCH || TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 8baf645453adf5..7225b29b00cc43 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -58,6 +58,8 @@ inline bool compUnixX86Abi() #define TARGET_READABLE_NAME "ARM64" #elif defined(TARGET_LOONGARCH64) #define TARGET_READABLE_NAME "LOONGARCH64" +#elif defined(TARGET_RISCV64) +#define TARGET_READABLE_NAME "RISCV64" #else #error Unsupported or unset target architecture #endif @@ -85,6 +87,10 @@ inline bool compUnixX86Abi() #define REGMASK_BITS 64 #define CSE_CONST_SHARED_LOW_BITS 12 +#elif defined(TARGET_RISCV64) +#define REGMASK_BITS 64 +#define CSE_CONST_SHARED_LOW_BITS 12 + #else #error Unsupported or unset target architecture #endif @@ -100,7 +106,7 @@ inline bool compUnixX86Abi() // be assigned during register allocation. // REG_NA - Used to indicate that a register is either not yet assigned or not required. // -#if defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) enum _regNumber_enum : unsigned { #define REGDEF(name, rnum, mask, sname) REG_##name = rnum, @@ -208,7 +214,7 @@ enum _regMask_enum : unsigned // In any case, we believe that is OK to freely cast between these types; no information will // be lost. -#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) typedef unsigned __int64 regMaskTP; #else typedef unsigned regMaskTP; @@ -262,6 +268,8 @@ typedef unsigned char regNumberSmall; #include "targetarm64.h" #elif defined(TARGET_LOONGARCH64) #include "targetloongarch64.h" +#elif defined(TARGET_RISCV64) +#include "targetriscv64.h" #else #error Unsupported or unset target architecture #endif @@ -559,7 +567,7 @@ inline regMaskTP genRegMask(regNumber reg) inline regMaskTP genRegMaskFloat(regNumber reg, var_types type /* = TYP_DOUBLE */) { -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) assert(genIsValidFloatReg(reg)); assert((unsigned)reg < ArrLen(regMasks)); return regMasks[reg]; diff --git a/src/coreclr/jit/targetriscv64.cpp b/src/coreclr/jit/targetriscv64.cpp new file mode 100644 index 00000000000000..5c51f66f83c402 --- /dev/null +++ b/src/coreclr/jit/targetriscv64.cpp @@ -0,0 +1,27 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*****************************************************************************/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#if defined(TARGET_RISCV64) + +#include "target.h" + +const char* Target::g_tgtCPUName = "riscv64"; +const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L; +const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L; + +// clang-format off +const regNumber intArgRegs [] = {REG_A0, REG_A1, REG_A2, REG_A3, REG_A4, REG_A5, REG_A6, REG_A7}; +const regMaskTP intArgMasks[] = {RBM_A0, RBM_A1, RBM_A2, RBM_A3, RBM_A4, RBM_A5, RBM_A6, RBM_A7}; + +const regNumber fltArgRegs [] = {REG_FLTARG_0, REG_FLTARG_1, REG_FLTARG_2, REG_FLTARG_3, REG_FLTARG_4, REG_FLTARG_5, REG_FLTARG_6, REG_FLTARG_7 }; +const regMaskTP fltArgMasks[] = {RBM_FLTARG_0, RBM_FLTARG_1, RBM_FLTARG_2, RBM_FLTARG_3, RBM_FLTARG_4, RBM_FLTARG_5, RBM_FLTARG_6, RBM_FLTARG_7 }; +// clang-format on + +#endif // TARGET_RISCV64 diff --git a/src/coreclr/jit/targetriscv64.h b/src/coreclr/jit/targetriscv64.h new file mode 100644 index 00000000000000..d2bc193404cea7 --- /dev/null +++ b/src/coreclr/jit/targetriscv64.h @@ -0,0 +1,306 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#pragma once + +#if !defined(TARGET_RISCV64) +#error The file should not be included for this platform. +#endif + +// clang-format off + #define CPU_LOAD_STORE_ARCH 1 + #define CPU_HAS_FP_SUPPORT 1 + #define ROUND_FLOAT 0 // Do not round intermed float expression results + #define CPU_HAS_BYTE_REGS 0 + + #define CPBLK_UNROLL_LIMIT 64 // Upper bound to let the code generator to loop unroll CpBlk + #define INITBLK_UNROLL_LIMIT 64 // Upper bound to let the code generator to loop unroll InitBlk + +#ifdef FEATURE_SIMD +#pragma error("SIMD Unimplemented yet RISCV64") +#endif // FEATURE_SIMD + + #define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog + #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers + #define FEATURE_MULTIREG_STRUCT_PROMOTE 1 // True when we want to promote fields of a multireg struct into registers + #define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp + #define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. + #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set + #define FEATURE_IMPLICIT_BYREFS 1 // Support for struct parameters passed via pointers to shadow copies + #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register + #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register + #define FEATURE_MULTIREG_RET 1 // Support for returning a single value in more than one register + #define FEATURE_STRUCT_CLASSIFIER 0 // Uses a classifier function to determine is structs are passed/returned in more than one register + #define MAX_PASS_SINGLEREG_BYTES 8 // Maximum size of a struct passed in a single register (8-byte vector). + #define MAX_PASS_MULTIREG_BYTES 16 // Maximum size of a struct that could be passed in more than one register + #define MAX_RET_MULTIREG_BYTES 16 // Maximum size of a struct that could be returned in more than one register (Max is an HFA or 2 doubles) + #define MAX_ARG_REG_COUNT 2 // Maximum registers used to pass a single argument in multiple registers. + #define MAX_RET_REG_COUNT 2 // Maximum registers used to return a value. + #define MAX_MULTIREG_COUNT 2 // Maximum number of registers defined by a single instruction (including calls). + // This is also the maximum number of registers for a MultiReg node. + + #define NOGC_WRITE_BARRIERS 1 // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers + #define USER_ARGS_COME_LAST 1 + #define EMIT_TRACK_STACK_DEPTH 1 // This is something of a workaround. For both ARM and AMD64, the frame size is fixed, so we don't really + // need to track stack depth, but this is currently necessary to get GC information reported at call sites. + #define TARGET_POINTER_SIZE 8 // equal to sizeof(void*) and the managed pointer size in bytes for this target + #define FEATURE_EH 1 // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses. + #define FEATURE_EH_CALLFINALLY_THUNKS 1 // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses. + #define ETW_EBP_FRAMED 1 // if 1 we cannot use REG_FP as a scratch register and must setup the frame pointer for most methods + #define CSE_CONSTS 1 // Enable if we want to CSE constants + + #define REG_FP_FIRST REG_F0 + #define REG_FP_LAST REG_F31 + #define FIRST_FP_ARGREG REG_F10 + #define LAST_FP_ARGREG REG_F17 + + #define REGNUM_BITS 6 // number of bits in a REG_* + #define REGSIZE_BYTES 8 // number of bytes in one general purpose register + #define FP_REGSIZE_BYTES 8 // number of bytes in one FP/SIMD register + #define FPSAVE_REGSIZE_BYTES 8 // number of bytes in one FP/SIMD register that are saved/restored, for callee-saved registers + + #define MIN_ARG_AREA_FOR_CALL 0 // Minimum required outgoing argument space for a call. + + #define CODE_ALIGN 4 // code alignment requirement + #define STACK_ALIGN 16 // stack alignment requirement + + #define RBM_INT_CALLEE_SAVED (RBM_S1|RBM_S2|RBM_S3|RBM_S4|RBM_S5|RBM_S6|RBM_S7|RBM_S8|RBM_S9|RBM_S10|RBM_S11) + #define RBM_INT_CALLEE_TRASH (RBM_A0|RBM_A1|RBM_A2|RBM_A3|RBM_A4|RBM_A5|RBM_A6|RBM_A7|RBM_T0|RBM_T1|RBM_T2|RBM_T3|RBM_T4|RBM_T5|RBM_T6) + #define RBM_FLT_CALLEE_SAVED (RBM_F8|RBM_F9|RBM_F18|RBM_F19|RBM_F20|RBM_F21|RBM_F22|RBM_F23|RBM_F24|RBM_F25|RBM_F26|RBM_F27) + #define RBM_FLT_CALLEE_TRASH (RBM_F10|RBM_F11|RBM_F12|RBM_F13|RBM_F14|RBM_F15|RBM_F16|RBM_F17) + + #define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED) + #define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH) + + #define REG_DEFAULT_HELPER_CALL_TARGET REG_T2 + #define RBM_DEFAULT_HELPER_CALL_TARGET RBM_T2 + + #define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH) + #define RBM_ALLFLOAT (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH) + #define RBM_ALLDOUBLE RBM_ALLFLOAT + + // REG_VAR_ORDER is: (CALLEE_TRASH & ~CALLEE_TRASH_NOGC), CALLEE_TRASH_NOGC, CALLEE_SAVED + #define REG_VAR_ORDER REG_A0,REG_A1,REG_A2,REG_A3,REG_A4,REG_A5,REG_A6,REG_A7, \ + REG_T0,REG_T1,REG_T2,REG_T3,REG_T4,REG_T5,REG_T6, \ + REG_CALLEE_SAVED_ORDER + + #define REG_VAR_ORDER_FLT REG_F12,REG_F13,REG_F14,REG_F15,REG_F16,REG_F17,REG_F18,REG_F19, \ + REG_F2,REG_F3,REG_F4,REG_F5,REG_F6,REG_F7,REG_F8,REG_F9,REG_F10, \ + REG_F20,REG_F21,REG_F22,REG_F23, \ + REG_F24,REG_F25,REG_F26,REG_F27,REG_F28,REG_F29,REG_F30,REG_F31, \ + REG_F1,REG_F0 + + #define REG_CALLEE_SAVED_ORDER REG_S1,REG_S2,REG_S3,REG_S4,REG_S5,REG_S6,REG_S7,REG_S8,REG_S9,REG_S10,REG_S11 + #define RBM_CALLEE_SAVED_ORDER RBM_S1,RBM_S2,RBM_S3,RBM_S4,RBM_S5,RBM_S6,RBM_S7,RBM_S8,RBM_S9,RBM_S10,RBM_S11 + + #define CNT_CALLEE_SAVED (11) + #define CNT_CALLEE_TRASH (15) + #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED-1) + + #define CNT_CALLEE_SAVED_FLOAT (12) + #define CNT_CALLEE_TRASH_FLOAT (20) + + #define CALLEE_SAVED_REG_MAXSZ (CNT_CALLEE_SAVED * REGSIZE_BYTES) + #define CALLEE_SAVED_FLOAT_MAXSZ (CNT_CALLEE_SAVED_FLOAT * FPSAVE_REGSIZE_BYTES) + + #define REG_TMP_0 REG_T0 + + // Temporary registers used for the GS cookie check. + #define REG_GSCOOKIE_TMP_0 REG_T0 + #define REG_GSCOOKIE_TMP_1 REG_T1 + + // register to hold shift amount; no special register is required on ARM64. + #define REG_SHIFT REG_NA + #define RBM_SHIFT RBM_ALLINT + + // This is a general scratch register that does not conflict with the argument registers + #define REG_SCRATCH REG_T0 + + // This is a float scratch register that does not conflict with the argument registers + #define REG_SCRATCH_FLT REG_F28 + + // This is a general register that can be optionally reserved for other purposes during codegen + #define REG_OPT_RSVD REG_T1 + #define RBM_OPT_RSVD RBM_T1 + + // Where is the exception object on entry to the handler block? + #define REG_EXCEPTION_OBJECT REG_A0 + #define RBM_EXCEPTION_OBJECT RBM_A0 + + #define REG_JUMP_THUNK_PARAM REG_T2 + #define RBM_JUMP_THUNK_PARAM RBM_T2 + + #define REG_WRITE_BARRIER_DST REG_T3 + #define RBM_WRITE_BARRIER_DST RBM_T3 + + #define REG_WRITE_BARRIER_SRC REG_T4 + #define RBM_WRITE_BARRIER_SRC RBM_T4 + + #define REG_WRITE_BARRIER_DST_BYREF REG_T3 + #define RBM_WRITE_BARRIER_DST_BYREF RBM_T3 + + #define REG_WRITE_BARRIER_SRC_BYREF REG_T5 + #define RBM_WRITE_BARRIER_SRC_BYREF RBM_T5 + + #define RBM_CALLEE_TRASH_NOGC (RBM_T0|RBM_T1|RBM_T2|RBM_T3|RBM_T4|RBM_T5|RBM_T6|RBM_DEFAULT_HELPER_CALL_TARGET) + + // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + #define RBM_CALLEE_TRASH_WRITEBARRIER (RBM_WRITE_BARRIER_DST|RBM_CALLEE_TRASH_NOGC) + + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + #define RBM_CALLEE_GCTRASH_WRITEBARRIER RBM_CALLEE_TRASH_NOGC + + // Registers killed by CORINFO_HELP_ASSIGN_BYREF. + #define RBM_CALLEE_TRASH_WRITEBARRIER_BYREF (RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF | RBM_CALLEE_TRASH_NOGC) + + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_BYREF. + // Note that x13 and x14 are still valid byref pointers after this helper call, despite their value being changed. + #define RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF RBM_CALLEE_TRASH_NOGC + + // GenericPInvokeCalliHelper VASigCookie Parameter + #define REG_PINVOKE_COOKIE_PARAM REG_T0 + #define RBM_PINVOKE_COOKIE_PARAM RBM_T0 + + // GenericPInvokeCalliHelper unmanaged target Parameter + #define REG_PINVOKE_TARGET_PARAM REG_T2 + #define RBM_PINVOKE_TARGET_PARAM RBM_T2 + + // IL stub's secret MethodDesc parameter (JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM) + #define REG_SECRET_STUB_PARAM REG_T2 + #define RBM_SECRET_STUB_PARAM RBM_T2 + + // R2R indirect call. Use the same registers as VSD + #define REG_R2R_INDIRECT_PARAM REG_T6 + #define RBM_R2R_INDIRECT_PARAM RBM_T6 + + // JMP Indirect call register + #define REG_INDIRECT_CALL_TARGET_REG REG_T5 + + // Registers used by PInvoke frame setup + #define REG_PINVOKE_FRAME REG_T0 + #define RBM_PINVOKE_FRAME RBM_T0 + #define REG_PINVOKE_TCB REG_T1 + #define RBM_PINVOKE_TCB RBM_T1 + #define REG_PINVOKE_SCRATCH REG_T1 + #define RBM_PINVOKE_SCRATCH RBM_T1 + + // The following defines are useful for iterating a regNumber + #define REG_FIRST REG_R0 + #define REG_INT_FIRST REG_R0 + #define REG_INT_LAST REG_T6 + #define REG_INT_COUNT (REG_INT_LAST - REG_INT_FIRST + 1) + #define REG_NEXT(reg) ((regNumber)((unsigned)(reg) + 1)) + #define REG_PREV(reg) ((regNumber)((unsigned)(reg) - 1)) + + // The following registers are used in emitting Enter/Leave/Tailcall profiler callbacks + #define REG_PROFILER_ENTER_ARG_FUNC_ID REG_R16 + #define RBM_PROFILER_ENTER_ARG_FUNC_ID RBM_R16 + #define REG_PROFILER_ENTER_ARG_CALLER_SP REG_R17 + #define RBM_PROFILER_ENTER_ARG_CALLER_SP RBM_R17 + #define REG_PROFILER_LEAVE_ARG_FUNC_ID REG_R16 + #define RBM_PROFILER_LEAVE_ARG_FUNC_ID RBM_R16 + #define REG_PROFILER_LEAVE_ARG_CALLER_SP REG_R17 + #define RBM_PROFILER_LEAVE_ARG_CALLER_SP RBM_R17 + + // The registers trashed by profiler enter/leave/tailcall hook + #define RBM_PROFILER_ENTER_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_FLTARG_REGS|RBM_FP)) + #define RBM_PROFILER_LEAVE_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_FLTARG_REGS|RBM_FP)) + #define RBM_PROFILER_TAILCALL_TRASH RBM_PROFILER_LEAVE_TRASH + + // Which register are int and long values returned in ? + #define REG_INTRET REG_A0 + #define RBM_INTRET RBM_A0 + #define REG_LNGRET REG_A0 + #define RBM_LNGRET RBM_A0 + // second return register for 16-byte structs + #define REG_INTRET_1 REG_A1 + #define RBM_INTRET_1 RBM_A1 + + #define REG_FLOATRET REG_F10 + #define RBM_FLOATRET RBM_F10 + #define RBM_DOUBLERET RBM_F10 + #define REG_FLOATRET_1 REG_F11 + #define RBM_FLOATRET_1 RBM_F11 + #define RBM_DOUBLERET_1 RBM_F11 + + // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper + #define RBM_STOP_FOR_GC_TRASH RBM_CALLEE_TRASH + + // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. + #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH + + #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_A0 | RBM_A1 | RBM_A2 | RBM_A3 | RBM_A4 | RBM_A5 | RBM_A6 | RBM_A7 | RBM_T3)) + #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_T3 + #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_T0 + + #define REG_FPBASE REG_FP + #define RBM_FPBASE RBM_FP + #define STR_FPBASE "fp" + #define REG_SPBASE REG_SP + #define RBM_SPBASE RBM_SP // reuse the RBM for REG_ZR + #define STR_SPBASE "sp" + + #define FIRST_ARG_STACK_OFFS (2*REGSIZE_BYTES) // Caller's saved FP and return address + + #define MAX_REG_ARG 8 + #define MAX_FLOAT_REG_ARG 8 + + #define REG_ARG_FIRST REG_A0 + #define REG_ARG_LAST REG_A7 + #define REG_ARG_FP_FIRST REG_F10 + #define REG_ARG_FP_LAST REG_F17 + #define INIT_ARG_STACK_SLOT 0 // No outgoing reserved stack slots + + #define REG_ARG_0 REG_A0 + #define REG_ARG_1 REG_A1 + #define REG_ARG_2 REG_A2 + #define REG_ARG_3 REG_A3 + #define REG_ARG_4 REG_A4 + #define REG_ARG_5 REG_A5 + #define REG_ARG_6 REG_A6 + #define REG_ARG_7 REG_A7 + + extern const regNumber intArgRegs [MAX_REG_ARG]; + extern const regMaskTP intArgMasks[MAX_REG_ARG]; + + #define RBM_ARG_0 RBM_A0 + #define RBM_ARG_1 RBM_A1 + #define RBM_ARG_2 RBM_A2 + #define RBM_ARG_3 RBM_A3 + #define RBM_ARG_4 RBM_A4 + #define RBM_ARG_5 RBM_A5 + #define RBM_ARG_6 RBM_A6 + #define RBM_ARG_7 RBM_A7 + + #define REG_FLTARG_0 REG_F10 + #define REG_FLTARG_1 REG_F11 + #define REG_FLTARG_2 REG_F12 + #define REG_FLTARG_3 REG_F13 + #define REG_FLTARG_4 REG_F14 + #define REG_FLTARG_5 REG_F15 + #define REG_FLTARG_6 REG_F16 + #define REG_FLTARG_7 REG_F17 + + #define RBM_FLTARG_0 RBM_F10 + #define RBM_FLTARG_1 RBM_F11 + #define RBM_FLTARG_2 RBM_F12 + #define RBM_FLTARG_3 RBM_F13 + #define RBM_FLTARG_4 RBM_F14 + #define RBM_FLTARG_5 RBM_F15 + #define RBM_FLTARG_6 RBM_F16 + #define RBM_FLTARG_7 RBM_F17 + + #define RBM_ARG_REGS (RBM_ARG_0|RBM_ARG_1|RBM_ARG_2|RBM_ARG_3|RBM_ARG_4|RBM_ARG_5|RBM_ARG_6|RBM_ARG_7) + #define RBM_FLTARG_REGS (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3|RBM_FLTARG_4|RBM_FLTARG_5|RBM_FLTARG_6|RBM_FLTARG_7) + + extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG]; + extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG]; + + #define B_DIST_SMALL_MAX_NEG (-131072) + #define B_DIST_SMALL_MAX_POS (+131071) + + #define OFFSET_DIST_SMALL_MAX_NEG (-2048) + #define OFFSET_DIST_SMALL_MAX_POS (+2047) + + #define STACK_PROBE_BOUNDARY_THRESHOLD_BYTES 0 + +// clang-format on diff --git a/src/coreclr/jit/unwind.cpp b/src/coreclr/jit/unwind.cpp index 12114916f5362c..27047d50a19bee 100644 --- a/src/coreclr/jit/unwind.cpp +++ b/src/coreclr/jit/unwind.cpp @@ -465,6 +465,10 @@ UNATIVE_OFFSET Compiler::unwindGetCurrentOffset(FuncInfoDsc* func) // See unwindLoongarch64.cpp +#elif defined(TARGET_RISCV64) + +// See unwindRiscv64.cpp + #else // TARGET* #error Unsupported or unset target architecture diff --git a/src/coreclr/jit/unwind.h b/src/coreclr/jit/unwind.h index 46485e0eb11c6a..cd2c457ec57a0d 100644 --- a/src/coreclr/jit/unwind.h +++ b/src/coreclr/jit/unwind.h @@ -10,7 +10,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Windows no longer imposes a maximum prolog size. However, we still have an // assert here just to inform us if we increase the size of the prolog @@ -42,7 +42,16 @@ const unsigned MAX_EPILOG_SIZE_BYTES = 200; #define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 20) #define UW_MAX_CODE_WORDS_COUNT 31 #define UW_MAX_EPILOG_START_INDEX 0x3FFU -#endif // TARGET_LOONGARCH64 +#elif defined(TARGET_RISCV64) // TODO RISCV64 +const unsigned MAX_PROLOG_SIZE_BYTES = 200; +const unsigned MAX_EPILOG_SIZE_BYTES = 200; +#define UWC_END 0xE4 // "end" unwind code +#define UWC_END_C 0xE5 // "end_c" unwind code +#define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 20) +#define UW_MAX_CODE_WORDS_COUNT 31 +#define UW_MAX_EPILOG_START_INDEX 0x3FFU + +#endif // TARGET_RISCV64 #define UW_MAX_EPILOG_COUNT 31 // Max number that can be encoded in the "Epilog count" field // of the .pdata record @@ -111,9 +120,9 @@ class UnwindCodesBase { #if defined(TARGET_ARM) return b >= 0xFD; -#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) return (b == UWC_END); // TODO-ARM64-Bug?: what about the "end_c" code? -#endif // TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 } #ifdef DEBUG @@ -409,10 +418,12 @@ class UnwindEpilogCodes : public UnwindBase, public UnwindCodesBase uecFinalized = true; // With the "end" code in place, now we're done +#ifndef TARGET_RISCV64 // TODO COMMENTED OUT BECAUSE s_UnwindSize is not set #ifdef DEBUG unsigned codeSize = GetCodeSizeFromUnwindCodes(false); assert(codeSize <= MAX_EPILOG_SIZE_BYTES); #endif // DEBUG +#endif // !TARGET_RISCV64 } UnwindEpilogCodes() @@ -795,7 +806,7 @@ class UnwindInfo : public UnwindBase // Given the first byte of the unwind code, check that its opsize matches // the last instruction added in the emitter. void CheckOpsize(BYTE b1); -#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void CheckOpsize(BYTE b1) { } // nothing to do; all instructions are 4 bytes @@ -846,4 +857,4 @@ void DumpUnwindInfo(Compiler* comp, #endif // DEBUG -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 diff --git a/src/coreclr/jit/unwindriscv64.cpp b/src/coreclr/jit/unwindriscv64.cpp new file mode 100644 index 00000000000000..d8f91985f67bc5 --- /dev/null +++ b/src/coreclr/jit/unwindriscv64.cpp @@ -0,0 +1,1875 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX UnwindInfo XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#if defined(TARGET_RISCV64) + +#if defined(FEATURE_CFI_SUPPORT) +short Compiler::mapRegNumToDwarfReg(regNumber reg) +{ + _ASSERTE(!"TODO RISCV64 NYI"); + return 0; +} +#endif // FEATURE_CFI_SUPPORT + +void Compiler::unwindPush(regNumber reg) +{ + unreached(); // use one of the unwindSaveReg* functions instead. +} + +void Compiler::unwindAllocStack(unsigned size) +{ +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + if (compGeneratingProlog) + { + unwindAllocStackCFI(size); + } + + return; + } +#endif // FEATURE_CFI_SUPPORT + + UnwindInfo* pu = &funCurrentFunc()->uwi; + + assert(size % 16 == 0); + unsigned x = size / 16; + + if (x <= 0x1F) + { + // alloc_s: 000xxxxx: allocate small stack with size < 128 (2^5 * 16) + // TODO-Review: should say size < 512 + + pu->AddCode((BYTE)x); + } + else if (x <= 0x7F) + { + // alloc_m: 11000xxx | xxxxxxxx: allocate large stack with size < 2k (2^7 * 16) + + pu->AddCode(0xC0 | (BYTE)(x >> 8), (BYTE)x); + } + else + { + // alloc_l: 11100000 | xxxxxxxx | xxxxxxxx | xxxxxxxx : allocate large stack with size < 256M (2^24 * 16) + // + // For large stack size, the most significant bits + // are stored first (and next to the opCode) per the unwind spec. + + pu->AddCode(0xE0, (BYTE)(x >> 16), (BYTE)(x >> 8), (BYTE)x); + } +} + +void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset) +{ +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + if (compGeneratingProlog) + { + unwindSetFrameRegCFI(reg, offset); + } + + return; + } +#endif // FEATURE_CFI_SUPPORT + + UnwindInfo* pu = &funCurrentFunc()->uwi; + + if (offset == 0) + { + assert(reg == REG_FP); + + // set_fp: 11100001 : set up fp : with : move fp, sp + pu->AddCode(0xE1); + } + else + { + // add_fp: 11100010 | 000xxxxx | xxxxxxxx : set up fp with : addi.d fp, sp, #x * 8 + + assert(reg == REG_FP); + assert((offset % 8) == 0); + + unsigned x = offset / 8; + assert(x <= 0x1FF); + + pu->AddCode(0xE2, (BYTE)(x >> 8), (BYTE)x); + } +} + +void Compiler::unwindSaveReg(regNumber reg, unsigned offset) +{ + unwindSaveReg(reg, (int)offset); +} + +void Compiler::unwindNop() +{ + UnwindInfo* pu = &funCurrentFunc()->uwi; + +#ifdef DEBUG + if (verbose) + { + printf("unwindNop: adding NOP\n"); + } +#endif + + INDEBUG(pu->uwiAddingNOP = true); + + // nop: 11100011: no unwind operation is required. + pu->AddCode(0xE3); + + INDEBUG(pu->uwiAddingNOP = false); +} + +void Compiler::unwindSaveReg(regNumber reg, int offset) +{ + + // sd reg, sp, offset + + // offset for store in prolog must be positive and a multiple of 8. + assert(0 <= offset && offset <= 2047); + assert((offset % 8) == 0); + +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + if (compGeneratingProlog) + { + FuncInfoDsc* func = funCurrentFunc(); + UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func); + + createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg), offset); + } + + return; + } +#endif // FEATURE_CFI_SUPPORT + int z = offset / 8; + // assert(0 <= z && z <= 0xFF); + + UnwindInfo* pu = &funCurrentFunc()->uwi; + + if (emitter::isGeneralRegister(reg)) + { + // save_reg: 11010000 | 000xxxxx | zzzzzzzz: save reg r(1 + #X) at [sp + #Z * 8], offset <= 2047 + + assert(reg == REG_RA || (REG_FP <= reg && reg <= REG_S11)); // first legal register: RA, last legal register: S11 + + BYTE x = (BYTE)(reg - REG_RA); + assert(0 <= x && x <= 0x1B); + + pu->AddCode(0xD0, (BYTE)x, (BYTE)z); + } + else + { + // save_freg: 1101110x | xxxxzzzz | zzzzzzzz : save reg f(8 + #X) at [sp + #Z * 8], offset <= 2047 + assert(REG_F8 == reg || REG_F9 == reg || // first legal register: F8 + (REG_F18 <= reg && reg <= REG_F27)); // last legal register: F27 + + BYTE x = (BYTE)(reg - REG_F8); + assert(0 <= x && x <= 0x13); + + pu->AddCode(0xDC | (BYTE)(x >> 3), (BYTE)(x << 4) | (BYTE)(z >> 8), (BYTE)z); // TODO NEED TO CHECK LATER + } +} + +void Compiler::unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset) +{ + assert(!"unused on RISCV64 yet"); +} + +void Compiler::unwindReturn(regNumber reg) +{ + // Nothing to do; we will always have at least one trailing "end" opcode in our padding. +} + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Unwind Info Debug helpers XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#ifdef DEBUG + +// Return the size of the unwind code (from 1 to 4 bytes), given the first byte of the unwind bytes + +unsigned GetUnwindSizeFromUnwindHeader(BYTE b1) +{ + _ASSERTE(!"TODO RISCV64 NYI"); + return 0; +} + +#endif // DEBUG + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Unwind Info Support Classes XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindCodesBase +// +/////////////////////////////////////////////////////////////////////////////// + +#ifdef DEBUG + +// Walk the prolog codes and calculate the size of the prolog or epilog, in bytes. +unsigned UnwindCodesBase::GetCodeSizeFromUnwindCodes(bool isProlog) +{ + BYTE* pCodesStart = GetCodes(); + BYTE* pCodes = pCodesStart; + unsigned size = 0; + for (;;) + { + BYTE b1 = *pCodes; + if (IsEndCode(b1)) + { + break; // We hit an "end" code; we're done + } + size += 4; // All codes represent 4 byte instructions. + pCodes += GetUnwindSizeFromUnwindHeader(b1); + assert(pCodes - pCodesStart < 256); // 255 is the absolute maximum number of code bytes allowed + } + return size; +} + +#endif // DEBUG + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Debug dumpers XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#ifdef DEBUG + +// start is 0-based index from LSB, length is number of bits +DWORD ExtractBits(DWORD dw, DWORD start, DWORD length) +{ + return (dw >> start) & ((1 << length) - 1); +} + +// Dump the unwind data. +// Arguments: +// isHotCode: true if this unwind data is for the hot section +// startOffset: byte offset of the code start that this unwind data represents +// endOffset: byte offset of the code end that this unwind data represents +// pHeader: pointer to the unwind data blob +// unwindBlockSize: size in bytes of the unwind data blob + +void DumpUnwindInfo(Compiler* comp, + bool isHotCode, + UNATIVE_OFFSET startOffset, + UNATIVE_OFFSET endOffset, + const BYTE* const pHeader, + ULONG unwindBlockSize) +{ + printf("Unwind Info%s:\n", isHotCode ? "" : " COLD"); + + // pHeader is not guaranteed to be aligned. We put four 0xFF end codes at the end + // to provide padding, and round down to get a multiple of 4 bytes in size. + DWORD UNALIGNED* pdw = (DWORD UNALIGNED*)pHeader; + DWORD dw; + + dw = *pdw++; + + DWORD codeWords = ExtractBits(dw, 27, 5); + DWORD epilogCount = ExtractBits(dw, 22, 5); + DWORD EBit = ExtractBits(dw, 21, 1); + DWORD XBit = ExtractBits(dw, 20, 1); + DWORD Vers = ExtractBits(dw, 18, 2); + DWORD functionLength = ExtractBits(dw, 0, 18); + + printf(" >> Start offset : 0x%06x (not in unwind data)\n", comp->dspOffset(startOffset)); + printf(" >> End offset : 0x%06x (not in unwind data)\n", comp->dspOffset(endOffset)); + printf(" Code Words : %u\n", codeWords); + printf(" Epilog Count : %u\n", epilogCount); + printf(" E bit : %u\n", EBit); + printf(" X bit : %u\n", XBit); + printf(" Vers : %u\n", Vers); + printf(" Function Length : %u (0x%05x) Actual length = %u (0x%06x)\n", functionLength, functionLength, + functionLength * 4, functionLength * 4); + + assert(functionLength * 4 == endOffset - startOffset); + + if (codeWords == 0 && epilogCount == 0) + { + // We have an extension word specifying a larger number of Code Words or Epilog Counts + // than can be specified in the header word. + + dw = *pdw++; + + codeWords = ExtractBits(dw, 16, 8); + epilogCount = ExtractBits(dw, 0, 16); + assert((dw & 0xF0000000) == 0); // reserved field should be zero + + printf(" ---- Extension word ----\n"); + printf(" Extended Code Words : %u\n", codeWords); + printf(" Extended Epilog Count : %u\n", epilogCount); + } + + bool epilogStartAt[1024] = {}; // One byte per possible epilog start index; initialized to false + + if (EBit == 0) + { + // We have an array of epilog scopes + + printf(" ---- Epilog scopes ----\n"); + if (epilogCount == 0) + { + printf(" No epilogs\n"); + } + else + { + for (DWORD scope = 0; scope < epilogCount; scope++) + { + dw = *pdw++; + + DWORD epilogStartOffset = ExtractBits(dw, 0, 18); + DWORD res = ExtractBits(dw, 18, 4); + DWORD epilogStartIndex = ExtractBits(dw, 22, 10); + + // Note that epilogStartOffset for a funclet is the offset from the beginning + // of the current funclet, not the offset from the beginning of the main function. + // To help find it when looking through JitDump output, also show the offset from + // the beginning of the main function. + DWORD epilogStartOffsetFromMainFunctionBegin = epilogStartOffset * 4 + startOffset; + + assert(res == 0); + + printf(" ---- Scope %d\n", scope); + printf(" Epilog Start Offset : %u (0x%05x) Actual offset = %u (0x%06x) Offset from main " + "function begin = %u (0x%06x)\n", + comp->dspOffset(epilogStartOffset), comp->dspOffset(epilogStartOffset), + comp->dspOffset(epilogStartOffset * 4), comp->dspOffset(epilogStartOffset * 4), + comp->dspOffset(epilogStartOffsetFromMainFunctionBegin), + comp->dspOffset(epilogStartOffsetFromMainFunctionBegin)); + printf(" Epilog Start Index : %u (0x%02x)\n", epilogStartIndex, epilogStartIndex); + + epilogStartAt[epilogStartIndex] = true; // an epilog starts at this offset in the unwind codes + } + } + } + else + { + printf(" --- One epilog, unwind codes at %u\n", epilogCount); + assert(epilogCount < ArrLen(epilogStartAt)); + epilogStartAt[epilogCount] = true; // the one and only epilog starts its unwind codes at this offset + } + + // Dump the unwind codes + + printf(" ---- Unwind codes ----\n"); + + DWORD countOfUnwindCodes = codeWords * 4; + PBYTE pUnwindCode = (PBYTE)pdw; + BYTE b1, b2, b3, b4; + DWORD x, z; + for (DWORD i = 0; i < countOfUnwindCodes; i++) + { + // Does this byte start an epilog sequence? If so, note that fact. + if (epilogStartAt[i]) + { + printf(" ---- Epilog start at index %u ----\n", i); + } + + b1 = *pUnwindCode++; + + if ((b1 & 0xE0) == 0) + { + // alloc_s: 000xxxxx: allocate small stack with size < 128 (2^5 * 16) + // TODO-Review:should say size < 512 + x = b1 & 0x1F; + printf(" %02X alloc_s #%u (0x%02X); addi sp, sp, -%u (0x%03X)\n", b1, x, x, x * 16, x * 16); + } +#if 0 + else if ((b1 & 0xE0) == 0x20) + { + // save_s0s1_x: 001zzzzz: save pair at [sp-#Z*8]!, pre-indexed offset >= -248 + z = b1 & 0x1F; + printf(" %02X save_s0s1_x #%u (0x%02X); Two sd %s, %s, [sp, #-%u]!\n", b1, z, z, + getRegName(REG_S0), getRegName(REG_S1), z * 8); + } + else if ((b1 & 0xF0) == 0x40) + { + // save_fpra: 0100zzzz | zzzzzzzz: save pair at [sp+#Z*8], offset <= 4080 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + i++; + + z = ((DWORD)(b1 & 0xF) << 8) | (DWORD)b2; + printf(" %02X %02X save_fpra #%u (0x%03X); Two sd %s, %s, [sp, #%u]\n", b1, b2, z, z, getRegName(REG_FP), + getRegName(REG_RA), z * 8); + } + else if ((b1 & 0xF0) == 0x80) + { + // save_fpra_x: 1000zzzz | zzzzzzzz: save pair at [sp-(#Z+1)*8]!, pre-indexed offset >= -32768 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + i++; + + z = ((DWORD)(b1 & 0xF) << 8) | (DWORD)b2; + printf(" %02X %02X save_fpra_x #%u (0x%03X); Two sd %s, %s, [sp, #-%u]!\n", b1, b2, z, z, + getRegName(REG_FP), getRegName(REG_RA), (z + 1) * 8); + } +#endif + else if ((b1 & 0xF8) == 0xC0) + { + // alloc_m: 11000xxx | xxxxxxxx: allocate large stack with size < 2k (2^7 * 16) + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + i++; + + x = ((DWORD)(b1 & 0x7) << 8) | (DWORD)b2; + + printf(" %02X %02X alloc_m #%u (0x%03X); addi sp, sp, -%u (0x%04X)\n", b1, b2, x, x, x * 16, + x * 16); + } + else if (b1 == 0xD0) + { + // save_reg: 11010000 | 000xxxxx | zzzzzzzz: save reg r(1 + #X) at [sp + #Z * 8], offset <= 2047 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)b2; + z = (DWORD)b3; + + printf(" %02X %02X %02X save_reg X#%u Z#%u (0x%02X); sd %s, sp, %u\n", b1, b2, b3, x, z, z, + getRegName(REG_RA + x), z * 8); + } +#if 0 + else if (b1 == 0xC8) + { + // save_regp: 11001000 | 0xxxzzzz | zzzzzzzz: save s(0 + #X) pair at [sp + #Z * 8], offset <= 4080 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_regp X#%u Z#%u (0x%02X); Two sd %s, %s, [sp, #%u]\n", b1, b2, b3, x, z, z, + getRegName(REG_S0 + x), getRegName(REG_S0 + x + 1), z * 8); + } + else if (b1 == 0xCC) + { + // save_regp_x: 11001100 | 0xxxzzzz | zzzzzzzz: save pair s(0 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >= + // -32768 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i+= 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_regp_x X#%u Z#%u (0x%02X); Two sd %s, %s, [sp, #-%u]!\n", b1, b2, b3, x, z, z, + getRegName(REG_S0 + x), getRegName(REG_S0 + x + 1), (z + 1) * 8); + } + else if ((b1 & 0xFE) == 0xD4) + { + // save_reg_x: 1101010x | xxxzzzzz: save reg s(0 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >= -16384 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + i++; + + x = ((DWORD)(b1 & 0x1) << 3) | (DWORD)(b2 >> 5); + z = (DWORD)(b2 & 0x1F); + + printf(" %02X %02X save_reg_x X#%u Z#%u (0x%02X); sd %s, [sp, #-%u]!\n", b1, b2, x, z, z, + getRegName(REG_S0 + x), (z + 1) * 8); + } + else if (b1 == 0xD6) + { + // save_rapair: 11010110 | 0xxxzzzz | zzzzzzzz: save pair at [sp + #Z * 8], offset <= 32767 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_lrpair X#%u Z#%u (0x%02X); Two sd %s, %s, [sp, #%u]\n", b1, b2, b3, x, z, z, + getRegName(REG_S0 + x), getRegName(REG_RA), z * 8); + } + else if (b1 == 0xD8) + { + // save_fregp: 11011000 | 0xxxzzzz | zzzzzzzz : save pair f(24 + #X) at [sp + #Z * 8], offset <= 32767 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_fregp X#%u Z#%u (0x%02X); Two sdc1 %s, %s, [sp, #%u]\n", b1, b2, b3, x, z, z, + getRegName(REG_F24 + x, true), getRegName(REG_F24 + x + 1, true), z * 8); + } + else if (b1 == 0xDA) + { + // save_fregp_x: 11011010 | 0xxxzzzz | zzzzzzzz : save pair f(24 + #X), at [sp - (#Z + 1) * 8]!, pre-indexed offset >= + // -32768 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_fregp_x X#%u Z#%u (0x%02X); Two sdc1 %s, %s, [sp, #-%u]!\n", b1, b2, b3, x, z, z, + getRegName(REG_F24 + x, true), getRegName(REG_F24 + x + 1, true), (z + 1) * 8); + } +#endif + else if (b1 == 0xDC) + { + // save_freg: 11011100 | 0xxxzzzz | zzzzzzzz : save reg f(24 + #X) at [sp + #Z * 8], offset <= 2047 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_freg X#%u Z#%u (0x%02X); fsd %s, [sp, #%u]\n", b1, b2, b3, x, z, z, + getRegName(REG_F24 + x), z * 8); + } +#if 0 + else if (b1 == 0xDE) + { + // save_freg_x: 11011110 | xxxzzzzz : save reg f(24 + #X) at [sp - (#Z + 1) * 8]!, pre - indexed offset >= + // -16384 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + i++; + + x = (DWORD)(b2 >> 5); + z = (DWORD)(b2 & 0x1F); + + printf(" %02X %02X save_freg_x X#%u Z#%u (0x%02X); sdc1 %s, [sp, #-%u]!\n", b1, b2, x, z, z, + getRegName(REG_F24 + x, true), (z + 1) * 8); + } +#endif + else if (b1 == 0xE0) + { + // alloc_l: 11100000 | xxxxxxxx | xxxxxxxx | xxxxxxxx : allocate large stack with size < 256M (2^24 * 16) + assert(i + 3 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + b4 = *pUnwindCode++; + i += 3; + + x = ((DWORD)b2 << 16) | ((DWORD)b3 << 8) | (DWORD)b4; + + printf(" %02X %02X %02X %02X alloc_l %u (0x%06X); addi sp, sp, -%u (%06X)\n", b1, b2, b3, b4, x, x, + x * 16, x * 16); + } + else if (b1 == 0xE1) + { + // set_fp: 11100001 : set up $29 : with : move fp, sp + + printf(" %02X set_fp; move %s, sp\n", b1, getRegName(REG_FP)); + } + else if (b1 == 0xE2) + { + // add_fp: 11100010 | 000xxxxx | xxxxxxxx : set up fp with : addi.d fp, sp, #x * 8 + assert(i + 2 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = ((DWORD)(b2 & 0x1F) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X add_fp %u (0x%02X); addi %s, sp, #%u\n", b1, b2, b3, x, x, + getRegName(REG_FP), x * 8); + } + else if (b1 == 0xE3) + { + // nop: 11100011: no unwind operation is required. + + printf(" %02X nop\n", b1); + } + else if (b1 == 0xE4) + { + // end: 11100100 : end of unwind code + + printf(" %02X end\n", b1); + } + else if (b1 == 0xE5) + { + // end_c: 11100101 : end of unwind code in current chained scope. + + printf(" %02X end_c\n", b1); + } + else if (b1 == 0xE6) + { + // save_next: 11100110 : save next non - volatile Int or FP register pair. + + printf(" %02X save_next\n", b1); + } + else + { + printf("===========[riscv64] Unknown / reserved unwind code: %02X\n", b1); + // Unknown / reserved unwind code + assert(!"Internal error decoding unwind codes"); + } + } + + pdw += codeWords; + assert((PBYTE)pdw == pUnwindCode); + assert((PBYTE)pdw == pHeader + unwindBlockSize); + + assert(XBit == 0); // We don't handle the case where exception data is present, such as the Exception Handler RVA + + printf("\n"); +} + +#endif // DEBUG + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Unwind APIs XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +void Compiler::unwindBegProlog() +{ + assert(compGeneratingProlog); + +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + unwindBegPrologCFI(); + return; + } +#endif // FEATURE_CFI_SUPPORT + + FuncInfoDsc* func = funCurrentFunc(); + + // There is only one prolog for a function/funclet, and it comes first. So now is + // a good time to initialize all the unwind data structures. + + emitLocation* startLoc; + emitLocation* endLoc; + unwindGetFuncLocations(func, true, &startLoc, &endLoc); + + func->uwi.InitUnwindInfo(this, startLoc, endLoc); + func->uwi.CaptureLocation(); + + func->uwiCold = NULL; // No cold data yet +} + +void Compiler::unwindEndProlog() +{ + assert(compGeneratingProlog); +} + +void Compiler::unwindBegEpilog() +{ + assert(compGeneratingEpilog); + +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + return; + } +#endif // FEATURE_CFI_SUPPORT + + funCurrentFunc()->uwi.AddEpilog(); +} + +void Compiler::unwindEndEpilog() +{ + assert(compGeneratingEpilog); +} + +// The instructions between the last captured "current state" and the current instruction +// are in the prolog but have no effect for unwinding. Emit the appropriate NOP unwind codes +// for them. +void Compiler::unwindPadding() +{ +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + return; + } +#endif // FEATURE_CFI_SUPPORT + + UnwindInfo* pu = &funCurrentFunc()->uwi; + GetEmitter()->emitUnwindNopPadding(pu->GetCurrentEmitterLocation(), this); +} + +// Ask the VM to reserve space for the unwind information for the function and +// all its funclets. +void Compiler::unwindReserve() +{ + assert(!compGeneratingProlog); + assert(!compGeneratingEpilog); + + assert(compFuncInfoCount > 0); + for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++) + { + unwindReserveFunc(funGetFunc(funcIdx)); + } +} + +void Compiler::unwindReserveFunc(FuncInfoDsc* func) +{ + BOOL isFunclet = (func->funKind == FUNC_ROOT) ? FALSE : TRUE; + bool funcHasColdSection = false; + +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + DWORD unwindCodeBytes = 0; + if (fgFirstColdBlock != nullptr) + { + eeReserveUnwindInfo(isFunclet, true /*isColdCode*/, unwindCodeBytes); + } + unwindCodeBytes = (DWORD)(func->cfiCodes->size() * sizeof(CFI_CODE)); + eeReserveUnwindInfo(isFunclet, false /*isColdCode*/, unwindCodeBytes); + + return; + } +#endif // FEATURE_CFI_SUPPORT + + // If there is cold code, split the unwind data between the hot section and the + // cold section. This needs to be done before we split into fragments, as each + // of the hot and cold sections can have multiple fragments. + + if (fgFirstColdBlock != NULL) + { + assert(!isFunclet); // TODO-CQ: support hot/cold splitting with EH + + emitLocation* startLoc; + emitLocation* endLoc; + unwindGetFuncLocations(func, false, &startLoc, &endLoc); + + func->uwiCold = new (this, CMK_UnwindInfo) UnwindInfo(); + func->uwiCold->InitUnwindInfo(this, startLoc, endLoc); + func->uwiCold->HotColdSplitCodes(&func->uwi); + + funcHasColdSection = true; + } + + // First we need to split the function or funclet into fragments that are no larger + // than 512K, so the fragment size will fit in the unwind data "Function Length" field. + // The LOONGARCH Exception Data specification "Function Fragments" section describes this. + func->uwi.Split(); + + func->uwi.Reserve(isFunclet, true); + + // After the hot section, split and reserve the cold section + + if (funcHasColdSection) + { + assert(func->uwiCold != NULL); + + func->uwiCold->Split(); + func->uwiCold->Reserve(isFunclet, false); + } +} + +// unwindEmit: Report all the unwind information to the VM. +// Arguments: +// pHotCode: Pointer to the beginning of the memory with the function and funclet hot code +// pColdCode: Pointer to the beginning of the memory with the function and funclet cold code. + +void Compiler::unwindEmit(void* pHotCode, void* pColdCode) +{ + assert(compFuncInfoCount > 0); + for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++) + { + unwindEmitFunc(funGetFunc(funcIdx), pHotCode, pColdCode); + } +} + +void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode) +{ + // Verify that the JIT enum is in sync with the JIT-EE interface enum + static_assert_no_msg(FUNC_ROOT == (FuncKind)CORJIT_FUNC_ROOT); + static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER); + static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER); + +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + unwindEmitFuncCFI(func, pHotCode, pColdCode); + return; + } +#endif // FEATURE_CFI_SUPPORT + + func->uwi.Allocate((CorJitFuncKind)func->funKind, pHotCode, pColdCode, true); + + if (func->uwiCold != NULL) + { + func->uwiCold->Allocate((CorJitFuncKind)func->funKind, pHotCode, pColdCode, false); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindPrologCodes +// +/////////////////////////////////////////////////////////////////////////////// + +// We're going to use the prolog codes memory to store the final unwind data. +// Ensure we have enough memory to store everything. If 'epilogBytes' > 0, then +// move the prolog codes so there are 'epilogBytes' bytes after the prolog codes. +// Set the header pointer for future use, adding the header bytes (this pointer +// is updated when a header byte is added), and remember the index that points +// to the beginning of the header. + +void UnwindPrologCodes::SetFinalSize(int headerBytes, int epilogBytes) +{ +#if 0 // TODO COMMENTED OUT BECAUSE s_UnwindSize is not set +#ifdef DEBUG + // We're done adding codes. Check that we didn't accidentally create a bigger prolog. + unsigned codeSize = GetCodeSizeFromUnwindCodes(true); + assert(codeSize <= MAX_PROLOG_SIZE_BYTES); +#endif // DEBUG +#endif + + int prologBytes = Size(); + + EnsureSize(headerBytes + prologBytes + epilogBytes + 3); // 3 = padding bytes for alignment + + upcUnwindBlockSlot = upcCodeSlot - headerBytes - epilogBytes; // Index of the first byte of the unwind header + + assert(upcMemSize == upcUnwindBlockSlot + headerBytes + prologBytes + epilogBytes + 3); + + upcHeaderSlot = upcUnwindBlockSlot - 1; // upcHeaderSlot is always incremented before storing + assert(upcHeaderSlot >= -1); + + if (epilogBytes > 0) + { + // The prolog codes that are already at the end of the array need to get moved to the middle, + // with space for the non-matching epilog codes to follow. + + memmove_s(&upcMem[upcUnwindBlockSlot + headerBytes], upcMemSize - (upcUnwindBlockSlot + headerBytes), + &upcMem[upcCodeSlot], prologBytes); + + // Note that the three UWC_END padding bytes still exist at the end of the array. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + // Zero out the epilog codes memory, to ensure we've copied the right bytes. Don't zero the padding bytes. + memset(&upcMem[upcUnwindBlockSlot + headerBytes + prologBytes], 0, epilogBytes); +#endif // DEBUG + + upcEpilogSlot = + upcUnwindBlockSlot + headerBytes + prologBytes; // upcEpilogSlot points to the next epilog location to fill + + // Update upcCodeSlot to point at the new beginning of the prolog codes + upcCodeSlot = upcUnwindBlockSlot + headerBytes; + } +} + +// Add a header word. Header words are added starting at the beginning, in order: first to last. +// This is in contrast to the prolog unwind codes, which are added in reverse order. +void UnwindPrologCodes::AddHeaderWord(DWORD d) +{ + assert(-1 <= upcHeaderSlot); + assert(upcHeaderSlot + 4 < upcCodeSlot); // Don't collide with the unwind codes that are already there! + + // Store it byte-by-byte in little-endian format. We've already ensured there is enough space + // in SetFinalSize(). + upcMem[++upcHeaderSlot] = (BYTE)d; + upcMem[++upcHeaderSlot] = (BYTE)(d >> 8); + upcMem[++upcHeaderSlot] = (BYTE)(d >> 16); + upcMem[++upcHeaderSlot] = (BYTE)(d >> 24); +} + +// AppendEpilog: copy the epilog bytes to the next epilog bytes slot +void UnwindPrologCodes::AppendEpilog(UnwindEpilogInfo* pEpi) +{ + assert(upcEpilogSlot != -1); + + int epiSize = pEpi->Size(); + memcpy_s(&upcMem[upcEpilogSlot], upcMemSize - upcEpilogSlot - 3, pEpi->GetCodes(), + epiSize); // -3 to avoid writing to the alignment padding + assert(pEpi->GetStartIndex() == + upcEpilogSlot - upcCodeSlot); // Make sure we copied it where we expected to copy it. + + upcEpilogSlot += epiSize; + assert(upcEpilogSlot <= upcMemSize - 3); +} + +// GetFinalInfo: return a pointer to the final unwind info to hand to the VM, and the size of this info in bytes +void UnwindPrologCodes::GetFinalInfo(/* OUT */ BYTE** ppUnwindBlock, /* OUT */ ULONG* pUnwindBlockSize) +{ + assert(upcHeaderSlot + 1 == upcCodeSlot); // We better have filled in the header before asking for the final data! + + *ppUnwindBlock = &upcMem[upcUnwindBlockSlot]; + + // We put 4 'end' codes at the end for padding, so we can ensure we have an + // unwind block that is a multiple of 4 bytes in size. Subtract off three 'end' + // codes (leave one), and then align the size up to a multiple of 4. + *pUnwindBlockSize = AlignUp((UINT)(upcMemSize - upcUnwindBlockSlot - 3), sizeof(DWORD)); +} + +int UnwindPrologCodes::Match(UnwindEpilogInfo* pEpi) +{ + if (Size() < pEpi->Size()) + { + return -1; + } + + int matchIndex = 0; // Size() - pEpi->Size(); + + BYTE* pProlog = GetCodes(); + BYTE* pEpilog = pEpi->GetCodes(); + + // First check set_fp. + if (0 < pEpi->Size()) + { + if (*pProlog == 0xE1) + { + pProlog++; + if (*pEpilog == 0xE1) + { + pEpilog++; + } + else + { + matchIndex = 1; + } + } + else if (*pProlog == 0xE2) + { + pProlog += 3; + if (*pEpilog == 0xE1) + { + pEpilog += 3; + } + else + { + matchIndex = 3; + } + } + } + + if (0 == memcmp(pProlog, pEpilog, pEpi->Size())) + { + return matchIndex; + } + + return -1; +} + +// Copy the prolog codes from another prolog. The only time this is legal is +// if we are at the initial state and no prolog codes have been added. +// This is used to create the 'phantom' prolog for non-first fragments. + +void UnwindPrologCodes::CopyFrom(UnwindPrologCodes* pCopyFrom) +{ + _ASSERTE(!"TODO RISCV64 NYI"); +} + +void UnwindPrologCodes::EnsureSize(int requiredSize) +{ + if (requiredSize > upcMemSize) + { + // Reallocate, and copy everything to a new array. + + // Choose the next power of two size. This may or may not be the best choice. + noway_assert((requiredSize & 0xC0000000) == 0); // too big! + int newSize; + for (newSize = upcMemSize << 1; newSize < requiredSize; newSize <<= 1) + { + // do nothing + } + + BYTE* newUnwindCodes = new (uwiComp, CMK_UnwindInfo) BYTE[newSize]; + memcpy_s(newUnwindCodes + newSize - upcMemSize, upcMemSize, upcMem, + upcMemSize); // copy the existing data to the end +#ifdef DEBUG + // Clear the old unwind codes; nobody should be looking at them + memset(upcMem, 0xFF, upcMemSize); +#endif // DEBUG + upcMem = newUnwindCodes; // we don't free anything that used to be there since we have a no-release allocator + upcCodeSlot += newSize - upcMemSize; + upcMemSize = newSize; + } +} + +#ifdef DEBUG +void UnwindPrologCodes::Dump(int indent) +{ + _ASSERTE(!"TODO RISCV64 NYI"); +} +#endif // DEBUG + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindEpilogCodes +// +/////////////////////////////////////////////////////////////////////////////// + +void UnwindEpilogCodes::EnsureSize(int requiredSize) +{ + if (requiredSize > uecMemSize) + { + // Reallocate, and copy everything to a new array. + + // Choose the next power of two size. This may or may not be the best choice. + noway_assert((requiredSize & 0xC0000000) == 0); // too big! + int newSize; + for (newSize = uecMemSize << 1; newSize < requiredSize; newSize <<= 1) + { + // do nothing + } + + BYTE* newUnwindCodes = new (uwiComp, CMK_UnwindInfo) BYTE[newSize]; + memcpy_s(newUnwindCodes, newSize, uecMem, uecMemSize); +#ifdef DEBUG + // Clear the old unwind codes; nobody should be looking at them + memset(uecMem, 0xFF, uecMemSize); +#endif // DEBUG + uecMem = newUnwindCodes; // we don't free anything that used to be there since we have a no-release allocator + // uecCodeSlot stays the same + uecMemSize = newSize; + } +} + +#ifdef DEBUG +void UnwindEpilogCodes::Dump(int indent) +{ + _ASSERTE(!"TODO RISCV64 NYI"); +} +#endif // DEBUG + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindEpilogInfo +// +/////////////////////////////////////////////////////////////////////////////// + +// Do the current unwind codes match those of the argument epilog? +// If they don't match, return -1. If they do, return the offset into +// our codes at which the argument codes match. Note that this means that +// the argument codes can match a subset of our codes. The subset needs to be at +// the end, for the "end" code to match. +// +// Note that if we wanted to handle 0xFD and 0xFE codes, by converting +// an existing 0xFF code to one of those, we might do that here. + +int UnwindEpilogInfo::Match(UnwindEpilogInfo* pEpi) +{ + if (Matches()) + { + // We are already matched to someone else, and won't provide codes to the final layout + return -1; + } + + if (Size() < pEpi->Size()) + { + return -1; + } + + int matchIndex = Size() - pEpi->Size(); + + if (0 == memcmp(GetCodes() + matchIndex, pEpi->GetCodes(), pEpi->Size())) + { + return matchIndex; + } + + return -1; +} + +void UnwindEpilogInfo::CaptureEmitLocation() +{ + noway_assert(epiEmitLocation == NULL); // This function is only called once per epilog + epiEmitLocation = new (uwiComp, CMK_UnwindInfo) emitLocation(); + epiEmitLocation->CaptureLocation(uwiComp->GetEmitter()); +} + +void UnwindEpilogInfo::FinalizeOffset() +{ + epiStartOffset = epiEmitLocation->CodeOffset(uwiComp->GetEmitter()); +} + +#ifdef DEBUG +void UnwindEpilogInfo::Dump(int indent) +{ + _ASSERTE(!"TODO RISCV64 NYI"); +} +#endif // DEBUG + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindFragmentInfo +// +/////////////////////////////////////////////////////////////////////////////// + +UnwindFragmentInfo::UnwindFragmentInfo(Compiler* comp, emitLocation* emitLoc, bool hasPhantomProlog) + : UnwindBase(comp) + , ufiNext(NULL) + , ufiEmitLoc(emitLoc) + , ufiHasPhantomProlog(hasPhantomProlog) + , ufiPrologCodes(comp) + , ufiEpilogFirst(comp) + , ufiEpilogList(NULL) + , ufiEpilogLast(NULL) + , ufiCurCodes(&ufiPrologCodes) + , ufiSize(0) + , ufiStartOffset(UFI_ILLEGAL_OFFSET) +{ +#ifdef DEBUG + ufiNum = 1; + ufiInProlog = true; + ufiInitialized = UFI_INITIALIZED_PATTERN; +#endif // DEBUG +} + +void UnwindFragmentInfo::FinalizeOffset() +{ + if (ufiEmitLoc == NULL) + { + // NULL emit location means the beginning of the code. This is to handle the first fragment prolog. + ufiStartOffset = 0; + } + else + { + ufiStartOffset = ufiEmitLoc->CodeOffset(uwiComp->GetEmitter()); + } + + for (UnwindEpilogInfo* pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext) + { + pEpi->FinalizeOffset(); + } +} + +void UnwindFragmentInfo::AddEpilog() +{ + assert(ufiInitialized == UFI_INITIALIZED_PATTERN); + +#ifdef DEBUG + if (ufiInProlog) + { + assert(ufiEpilogList == NULL); + ufiInProlog = false; + } + else + { + assert(ufiEpilogList != NULL); + } +#endif // DEBUG + + // Either allocate a new epilog object, or, for the first one, use the + // preallocated one that is a member of the UnwindFragmentInfo class. + + UnwindEpilogInfo* newepi; + + if (ufiEpilogList == NULL) + { + // Use the epilog that's in the class already. Be sure to initialize it! + newepi = ufiEpilogList = &ufiEpilogFirst; + } + else + { + newepi = new (uwiComp, CMK_UnwindInfo) UnwindEpilogInfo(uwiComp); + } + + // Put the new epilog at the end of the epilog list + + if (ufiEpilogLast != NULL) + { + ufiEpilogLast->epiNext = newepi; + } + + ufiEpilogLast = newepi; + + // What is the starting code offset of the epilog? Store an emitter location + // so we can ask the emitter later, after codegen. + + newepi->CaptureEmitLocation(); + + // Put subsequent unwind codes in this new epilog + + ufiCurCodes = &newepi->epiCodes; +} + +// Copy the prolog codes from the 'pCopyFrom' fragment. These prolog codes will +// become 'phantom' prolog codes in this fragment. Note that this fragment should +// not have any prolog codes currently; it is at the initial state. + +void UnwindFragmentInfo::CopyPrologCodes(UnwindFragmentInfo* pCopyFrom) +{ + _ASSERTE(!"TODO RISCV64 NYI"); +} + +// Split the epilog codes that currently exist in 'pSplitFrom'. The ones that represent +// epilogs that start at or after the location represented by 'emitLoc' are removed +// from 'pSplitFrom' and moved to this fragment. Note that this fragment should not have +// any epilog codes currently; it is at the initial state. + +void UnwindFragmentInfo::SplitEpilogCodes(emitLocation* emitLoc, UnwindFragmentInfo* pSplitFrom) +{ + _ASSERTE(!"TODO RISCV64 NYI"); +} + +// Is this epilog at the end of an unwind fragment? Ask the emitter. +// Note that we need to know this before all code offsets are finalized, +// so we can determine whether we can omit an epilog scope word for a +// single matching epilog. + +bool UnwindFragmentInfo::IsAtFragmentEnd(UnwindEpilogInfo* pEpi) +{ + return uwiComp->GetEmitter()->emitIsFuncEnd(pEpi->epiEmitLocation, (ufiNext == NULL) ? NULL : ufiNext->ufiEmitLoc); +} + +// Merge the unwind codes as much as possible. +// This function is called before all offsets are final. +// Also, compute the size of the final unwind block. Store this +// and some other data for later, when we actually emit the +// unwind block. + +void UnwindFragmentInfo::MergeCodes() +{ + assert(ufiInitialized == UFI_INITIALIZED_PATTERN); + + unsigned epilogCount = 0; + unsigned epilogCodeBytes = 0; // The total number of unwind code bytes used by epilogs that don't match the + // prolog codes + unsigned epilogIndex = ufiPrologCodes.Size(); // The "Epilog Start Index" for the next non-matching epilog codes + UnwindEpilogInfo* pEpi; + + for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext) + { + ++epilogCount; + + pEpi->FinalizeCodes(); + + // Does this epilog match the prolog? + // NOTE: for the purpose of matching, we don't handle the 0xFD and 0xFE end codes that allow slightly unequal + // prolog and epilog codes. + + int matchIndex; + + matchIndex = ufiPrologCodes.Match(pEpi); + if (matchIndex != -1) + { + pEpi->SetMatches(); + pEpi->SetStartIndex(matchIndex); // Prolog codes start at zero, so matchIndex is exactly the start index + } + else + { + // The epilog codes don't match the prolog codes. Do they match any of the epilogs + // we've seen so far? + + bool matched = false; + for (UnwindEpilogInfo* pEpi2 = ufiEpilogList; pEpi2 != pEpi; pEpi2 = pEpi2->epiNext) + { + matchIndex = pEpi2->Match(pEpi); + if (matchIndex != -1) + { + // Use the same epilog index as the one we matched, as it has already been set. + pEpi->SetMatches(); + pEpi->SetStartIndex(pEpi2->GetStartIndex() + matchIndex); // We might match somewhere inside pEpi2's + // codes, in which case matchIndex > 0 + matched = true; + break; + } + } + + if (!matched) + { + pEpi->SetStartIndex(epilogIndex); // We'll copy these codes to the next available location + epilogCodeBytes += pEpi->Size(); + epilogIndex += pEpi->Size(); + } + } + } + + DWORD codeBytes = ufiPrologCodes.Size() + epilogCodeBytes; + codeBytes = AlignUp(codeBytes, sizeof(DWORD)); + + DWORD codeWords = + codeBytes / sizeof(DWORD); // This is how many words we need to store all the unwind codes in the unwind block + + // Do we need the 2nd header word for "Extended Code Words" or "Extended Epilog Count"? + + bool needExtendedCodeWordsEpilogCount = + (codeWords > UW_MAX_CODE_WORDS_COUNT) || (epilogCount > UW_MAX_EPILOG_COUNT); + + // How many epilog scope words do we need? + + bool setEBit = false; // do we need to set the E bit? + unsigned epilogScopes = epilogCount; // Note that this could be zero if we have no epilogs! + + if (epilogCount == 1) + { + assert(ufiEpilogList != NULL); + assert(ufiEpilogList->epiNext == NULL); + + if (ufiEpilogList->Matches() && (ufiEpilogList->GetStartIndex() == 0) && // The match is with the prolog + !needExtendedCodeWordsEpilogCount && IsAtFragmentEnd(ufiEpilogList)) + { + epilogScopes = 0; // Don't need any epilog scope words + setEBit = true; + } + } + + DWORD headerBytes = (1 // Always need first header DWORD + + (needExtendedCodeWordsEpilogCount ? 1 : 0) // Do we need the 2nd DWORD for Extended Code + // Words or Extended Epilog Count? + + epilogScopes // One DWORD per epilog scope, for EBit = 0 + ) * + sizeof(DWORD); // convert it to bytes + + DWORD finalSize = headerBytes + codeBytes; // Size of actual unwind codes, aligned up to 4-byte words, + // including end padding if necessary + + // Construct the final unwind information. + + // We re-use the memory for the prolog unwind codes to construct the full unwind data. If all the epilogs + // match the prolog, this is easy: we just prepend the header. If there are epilog codes that don't match + // the prolog, we still use the prolog codes memory, but it's a little more complicated, since the + // unwind info is ordered as: (a) header, (b) prolog codes, (c) non-matching epilog codes. And, the prolog + // codes array is filled in from end-to-beginning. So, we compute the size of memory we need, ensure we + // have that much memory, and then copy the prolog codes to the right place, appending the non-matching + // epilog codes and prepending the header. + + ufiPrologCodes.SetFinalSize(headerBytes, epilogCodeBytes); + + if (epilogCodeBytes != 0) + { + // We need to copy the epilog code bytes to their final memory location + + for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext) + { + if (!pEpi->Matches()) + { + ufiPrologCodes.AppendEpilog(pEpi); + } + } + } + + // Save some data for later + ufiSize = finalSize; + ufiSetEBit = setEBit; + ufiNeedExtendedCodeWordsEpilogCount = needExtendedCodeWordsEpilogCount; + ufiCodeWords = codeWords; + ufiEpilogScopes = epilogScopes; +} + +// Finalize: Prepare the unwind information for the VM. Compute and prepend the unwind header. + +void UnwindFragmentInfo::Finalize(UNATIVE_OFFSET functionLength) +{ + assert(ufiInitialized == UFI_INITIALIZED_PATTERN); + +#ifdef DEBUG + if (0 && uwiComp->verbose) + { + printf("*************** Before fragment #%d finalize\n", ufiNum); + Dump(); + } +#endif + + // Compute the header + + noway_assert((functionLength & 3) == 0); + DWORD headerFunctionLength = functionLength / 4; + + DWORD headerVers = 0; // Version of the unwind info is zero. No other version number is currently defined. + DWORD headerXBit = 0; // We never generate "exception data", but the VM might add some. + DWORD headerEBit; + DWORD headerEpilogCount; // This depends on how we set headerEBit. + DWORD headerCodeWords; + DWORD headerExtendedEpilogCount = 0; // This depends on how we set headerEBit. + DWORD headerExtendedCodeWords = 0; + + if (ufiSetEBit) + { + headerEBit = 1; + headerEpilogCount = ufiEpilogList->GetStartIndex(); // probably zero -- the start of the prolog codes! + headerCodeWords = ufiCodeWords; + } + else + { + headerEBit = 0; + + if (ufiNeedExtendedCodeWordsEpilogCount) + { + headerEpilogCount = 0; + headerCodeWords = 0; + headerExtendedEpilogCount = ufiEpilogScopes; + headerExtendedCodeWords = ufiCodeWords; + } + else + { + headerEpilogCount = ufiEpilogScopes; + headerCodeWords = ufiCodeWords; + } + } + + // Start writing the header + + noway_assert(headerFunctionLength <= + 0x3FFFFU); // We create fragments to prevent this from firing, so if it hits, we have an internal error + + if ((headerEpilogCount > UW_MAX_EPILOG_COUNT) || (headerCodeWords > UW_MAX_CODE_WORDS_COUNT)) + { + IMPL_LIMITATION("unwind data too large"); + } + + DWORD header = headerFunctionLength | (headerVers << 18) | (headerXBit << 20) | (headerEBit << 21) | + (headerEpilogCount << 22) | (headerCodeWords << 27); + + ufiPrologCodes.AddHeaderWord(header); + + // Construct the second header word, if needed + + if (ufiNeedExtendedCodeWordsEpilogCount) + { + noway_assert(headerEBit == 0); + noway_assert(headerEpilogCount == 0); + noway_assert(headerCodeWords == 0); + noway_assert((headerExtendedEpilogCount > UW_MAX_EPILOG_COUNT) || + (headerExtendedCodeWords > UW_MAX_CODE_WORDS_COUNT)); + + if ((headerExtendedEpilogCount > UW_MAX_EXTENDED_EPILOG_COUNT) || + (headerExtendedCodeWords > UW_MAX_EXTENDED_CODE_WORDS_COUNT)) + { + IMPL_LIMITATION("unwind data too large"); + } + + DWORD header2 = headerExtendedEpilogCount | (headerExtendedCodeWords << 16); + + ufiPrologCodes.AddHeaderWord(header2); + } + + // Construct the epilog scope words, if needed + + if (!ufiSetEBit) + { + for (UnwindEpilogInfo* pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext) + { + // The epilog must strictly follow the prolog. The prolog is in the first fragment of + // the hot section. If this epilog is at the start of a fragment, it can't be the + // first fragment in the hot section. We actually don't know if we're processing + // the hot or cold section (or a funclet), so we can't distinguish these cases. Thus, + // we just assert that the epilog starts within the fragment. + assert(pEpi->GetStartOffset() >= GetStartOffset()); + + // We report the offset of an epilog as the offset from the beginning of the function/funclet fragment, + // NOT the offset from the beginning of the main function. + DWORD headerEpilogStartOffset = pEpi->GetStartOffset() - GetStartOffset(); + + noway_assert((headerEpilogStartOffset & 3) == 0); + headerEpilogStartOffset /= 4; // The unwind data stores the actual offset divided by 4 (since the low 2 bits + // of the actual offset is always zero) + + DWORD headerEpilogStartIndex = pEpi->GetStartIndex(); + + if ((headerEpilogStartOffset > UW_MAX_EPILOG_START_OFFSET) || + (headerEpilogStartIndex > UW_MAX_EPILOG_START_INDEX)) + { + IMPL_LIMITATION("unwind data too large"); + } + + DWORD epilogScopeWord = headerEpilogStartOffset | (headerEpilogStartIndex << 22); + + ufiPrologCodes.AddHeaderWord(epilogScopeWord); + } + } + + // The unwind code words are already here, following the header, so we're done! +} + +void UnwindFragmentInfo::Reserve(bool isFunclet, bool isHotCode) +{ + assert(isHotCode || !isFunclet); // TODO-CQ: support hot/cold splitting in functions with EH + + MergeCodes(); + + BOOL isColdCode = isHotCode ? FALSE : TRUE; + + ULONG unwindSize = Size(); + +#ifdef DEBUG + if (uwiComp->verbose) + { + if (ufiNum != 1) + printf("reserveUnwindInfo: fragment #%d:\n", ufiNum); + } +#endif + + uwiComp->eeReserveUnwindInfo(isFunclet, isColdCode, unwindSize); +} + +// Allocate the unwind info for a fragment with the VM. +// Arguments: +// funKind: funclet kind +// pHotCode: hot section code buffer +// pColdCode: cold section code buffer +// funcEndOffset: offset of the end of this function/funclet. Used if this fragment is the last one for a +// function/funclet. +// isHotCode: are we allocating the unwind info for the hot code section? + +void UnwindFragmentInfo::Allocate( + CorJitFuncKind funKind, void* pHotCode, void* pColdCode, UNATIVE_OFFSET funcEndOffset, bool isHotCode) +{ + UNATIVE_OFFSET startOffset; + UNATIVE_OFFSET endOffset; + UNATIVE_OFFSET codeSize; + + // We don't support hot/cold splitting with EH, so if there is cold code, this + // better not be a funclet! + // TODO-CQ: support funclets in cold code + + noway_assert(isHotCode || funKind == CORJIT_FUNC_ROOT); + + // Compute the final size, and start and end offsets of the fragment + + startOffset = GetStartOffset(); + + if (ufiNext == NULL) + { + // This is the last fragment, so the fragment extends to the end of the function/fragment. + assert(funcEndOffset != 0); + endOffset = funcEndOffset; + } + else + { + // The fragment length is all the code between the beginning of this fragment + // and the beginning of the next fragment. Note that all fragments have had their + // offsets computed before any fragment is allocated. + endOffset = ufiNext->GetStartOffset(); + } + + assert(endOffset > startOffset); + codeSize = endOffset - startOffset; + + // Finalize the fragment unwind block to hand to the VM + + Finalize(codeSize); + + // Get the final unwind information and hand it to the VM + + ULONG unwindBlockSize; + BYTE* pUnwindBlock; + + GetFinalInfo(&pUnwindBlock, &unwindBlockSize); + +#ifdef DEBUG + if (uwiComp->opts.dspUnwind) + { + DumpUnwindInfo(uwiComp, isHotCode, startOffset, endOffset, pUnwindBlock, unwindBlockSize); + } +#endif // DEBUG + + // Adjust for cold or hot code: + // 1. The VM doesn't want the cold code pointer unless this is cold code. + // 2. The startOffset and endOffset need to be from the base of the hot section for hot code + // and from the base of the cold section for cold code + + if (isHotCode) + { + assert(endOffset <= uwiComp->info.compTotalHotCodeSize); + pColdCode = NULL; + } + else + { + assert(startOffset >= uwiComp->info.compTotalHotCodeSize); + startOffset -= uwiComp->info.compTotalHotCodeSize; + endOffset -= uwiComp->info.compTotalHotCodeSize; + } + +#ifdef DEBUG + if (uwiComp->verbose) + { + if (ufiNum != 1) + printf("unwindEmit: fragment #%d:\n", ufiNum); + } +#endif // DEBUG + + uwiComp->eeAllocUnwindInfo((BYTE*)pHotCode, (BYTE*)pColdCode, startOffset, endOffset, unwindBlockSize, pUnwindBlock, + funKind); +} + +#ifdef DEBUG +void UnwindFragmentInfo::Dump(int indent) +{ + _ASSERTE(!"TODO RISCV64 NYI"); +} +#endif // DEBUG + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindInfo +// +/////////////////////////////////////////////////////////////////////////////// + +void UnwindInfo::InitUnwindInfo(Compiler* comp, emitLocation* startLoc, emitLocation* endLoc) +{ + uwiComp = comp; + + // The first fragment is a member of UnwindInfo, so it doesn't need to be allocated. + // However, its constructor needs to be explicitly called, since the constructor for + // UnwindInfo is not called. + + new (&uwiFragmentFirst, jitstd::placement_t()) UnwindFragmentInfo(comp, startLoc, false); + + uwiFragmentLast = &uwiFragmentFirst; + + uwiEndLoc = endLoc; + + // Allocate an emitter location object. It is initialized to something + // invalid: it has a null 'ig' that needs to get set before it can be used. + // Note that when we create an UnwindInfo for the cold section, this never + // gets initialized with anything useful, since we never add unwind codes + // to the cold section; we simply distribute the existing (previously added) codes. + uwiCurLoc = new (uwiComp, CMK_UnwindInfo) emitLocation(); + +#ifdef DEBUG + uwiInitialized = UWI_INITIALIZED_PATTERN; + uwiAddingNOP = false; +#endif // DEBUG +} + +// Split the unwind codes in 'puwi' into those that are in the hot section (leave them in 'puwi') +// and those that are in the cold section (move them to 'this'). There is exactly one fragment +// in each UnwindInfo; the fragments haven't been split for size, yet. + +void UnwindInfo::HotColdSplitCodes(UnwindInfo* puwi) +{ + _ASSERTE(!"TODO RISCV64 NYI"); +} + +// Split the function or funclet into fragments that are no larger than 512K, +// so the fragment size will fit in the unwind data "Function Length" field. + +void UnwindInfo::Split() +{ + UNATIVE_OFFSET maxFragmentSize; // The maximum size of a code fragment in bytes + + maxFragmentSize = UW_MAX_FRAGMENT_SIZE_BYTES; + +#ifdef DEBUG + // Consider COMPlus_JitSplitFunctionSize + unsigned splitFunctionSize = (unsigned)JitConfig.JitSplitFunctionSize(); + + if (splitFunctionSize != 0) + if (splitFunctionSize < maxFragmentSize) + maxFragmentSize = splitFunctionSize; +#endif // DEBUG + + // Now, there should be exactly one fragment. + + assert(uwiFragmentLast != NULL); + assert(uwiFragmentLast == &uwiFragmentFirst); + assert(uwiFragmentLast->ufiNext == NULL); + + // Find the code size of this function/funclet. + + UNATIVE_OFFSET startOffset; + UNATIVE_OFFSET endOffset; + UNATIVE_OFFSET codeSize; + + if (uwiFragmentLast->ufiEmitLoc == NULL) + { + // NULL emit location means the beginning of the code. This is to handle the first fragment prolog. + startOffset = 0; + } + else + { + startOffset = uwiFragmentLast->ufiEmitLoc->CodeOffset(uwiComp->GetEmitter()); + } + + if (uwiEndLoc == NULL) + { + // Note that compTotalHotCodeSize and compTotalColdCodeSize are computed before issuing instructions + // from the emitter instruction group offsets, and will be accurate unless the issued code shrinks. + // compNativeCodeSize is precise, but is only set after instructions are issued, which is too late + // for us, since we need to decide how many fragments we need before the code memory is allocated + // (which is before instruction issuing). + UNATIVE_OFFSET estimatedTotalCodeSize = + uwiComp->info.compTotalHotCodeSize + uwiComp->info.compTotalColdCodeSize; + assert(estimatedTotalCodeSize != 0); + endOffset = estimatedTotalCodeSize; + } + else + { + endOffset = uwiEndLoc->CodeOffset(uwiComp->GetEmitter()); + } + + assert(endOffset > startOffset); // there better be at least 1 byte of code + codeSize = endOffset - startOffset; + + // Now that we know the code size for this section (main function hot or cold, or funclet), + // figure out how many fragments we're going to need. + + UNATIVE_OFFSET numberOfFragments = (codeSize + maxFragmentSize - 1) / maxFragmentSize; // round up + assert(numberOfFragments > 0); + + if (numberOfFragments == 1) + { + // No need to split; we're done + return; + } + + // Now, we're going to commit to splitting the function into "numberOfFragments" fragments, + // for the purpose of unwind information. We need to do the actual splits so we can figure out + // the size of each piece of unwind data for the call to reserveUnwindInfo(). We won't know + // the actual offsets of the splits since we haven't issued the instructions yet, so store + // an emitter location instead of an offset, and "finalize" the offset in the unwindEmit() phase, + // like we do for the function length and epilog offsets. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + if (uwiComp->verbose) + { + printf("Split unwind info into %d fragments (function/funclet size: %d, maximum fragment size: %d)\n", + numberOfFragments, codeSize, maxFragmentSize); + } +#endif // DEBUG + + // Call the emitter to do the split, and call us back for every split point it chooses. + uwiComp->GetEmitter()->emitSplit(uwiFragmentLast->ufiEmitLoc, uwiEndLoc, maxFragmentSize, (void*)this, + EmitSplitCallback); + +#ifdef DEBUG + // Did the emitter split the function/funclet into as many fragments as we asked for? + // It might be fewer if the COMPlus_JitSplitFunctionSize was used, but it better not + // be fewer if we're splitting into 512K blocks! + + unsigned fragCount = 0; + for (UnwindFragmentInfo* pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext) + { + ++fragCount; + } + if (fragCount < numberOfFragments) + { + if (uwiComp->verbose) + { + printf("WARNING: asked the emitter for %d fragments, but only got %d\n", numberOfFragments, fragCount); + } + + // If this fires, then we split into fewer fragments than we asked for, and we are using + // the default, unwind-data-defined 512K maximum fragment size. We won't be able to fit + // this fragment into the unwind data! If you set COMPlus_JitSplitFunctionSize to something + // small, we might not be able to split into as many fragments as asked for, because we + // can't split prologs or epilogs. + assert(maxFragmentSize != UW_MAX_FRAGMENT_SIZE_BYTES); + } +#endif // DEBUG +} + +/*static*/ void UnwindInfo::EmitSplitCallback(void* context, emitLocation* emitLoc) +{ + _ASSERTE(!"TODO RISCV64 NYI"); +} + +// Reserve space for the unwind info for all fragments + +void UnwindInfo::Reserve(bool isFunclet, bool isHotCode) +{ + assert(uwiInitialized == UWI_INITIALIZED_PATTERN); + assert(isHotCode || !isFunclet); + + for (UnwindFragmentInfo* pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext) + { + pFrag->Reserve(isFunclet, isHotCode); + } +} + +// Allocate and populate VM unwind info for all fragments + +void UnwindInfo::Allocate(CorJitFuncKind funKind, void* pHotCode, void* pColdCode, bool isHotCode) +{ + assert(uwiInitialized == UWI_INITIALIZED_PATTERN); + + UnwindFragmentInfo* pFrag; + + // First, finalize all the offsets (the location of the beginning of fragments, and epilogs), + // so a fragment can use the finalized offset of the subsequent fragment to determine its code size. + + UNATIVE_OFFSET endOffset; + + if (uwiEndLoc == NULL) + { + assert(uwiComp->info.compNativeCodeSize != 0); + endOffset = uwiComp->info.compNativeCodeSize; + } + else + { + endOffset = uwiEndLoc->CodeOffset(uwiComp->GetEmitter()); + } + + for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext) + { + pFrag->FinalizeOffset(); + } + + for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext) + { + pFrag->Allocate(funKind, pHotCode, pColdCode, endOffset, isHotCode); + } +} + +void UnwindInfo::AddEpilog() +{ + assert(uwiInitialized == UWI_INITIALIZED_PATTERN); + assert(uwiFragmentLast != NULL); + uwiFragmentLast->AddEpilog(); + CaptureLocation(); +} + +void UnwindInfo::CaptureLocation() +{ + assert(uwiInitialized == UWI_INITIALIZED_PATTERN); + assert(uwiCurLoc != NULL); + uwiCurLoc->CaptureLocation(uwiComp->GetEmitter()); +} + +void UnwindInfo::AddFragment(emitLocation* emitLoc) +{ + _ASSERTE(!"TODO RISCV64 NYI"); +} + +#ifdef DEBUG + +void UnwindInfo::Dump(bool isHotCode, int indent) +{ + _ASSERTE(!"TODO RISCV64 NYI"); +} + +#endif // DEBUG + +#endif // TARGET_RISCV64 diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index bf5181be76a0a1..8b4dc62d439839 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -333,6 +333,15 @@ void dspRegMask(regMaskTP regMask, size_t minSiz) inRegRange = true; sep = "-"; } +#elif defined(TARGET_RISCV64) + if ((REG_A0 <= regNum && REG_A7 >= regNum) || + REG_T0 == regNum || REG_T1 == regNum || + (REG_T2 <= regNum && REG_T6 >= regNum)) + { + regHead = regNum; + inRegRange = true; + sep = "-"; + } #else // TARGET* #error Unsupported or unset target architecture #endif // TARGET* diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index c43bb47a36f8d4..c7c4fcfe969590 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -55,7 +55,7 @@ struct FloatTraits { #if defined(TARGET_XARCH) unsigned bits = 0xFFC00000u; -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) unsigned bits = 0x7FC00000u; #else #error Unsupported or unset target architecture @@ -81,7 +81,7 @@ struct DoubleTraits { #if defined(TARGET_XARCH) unsigned long long bits = 0xFFF8000000000000ull; -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) unsigned long long bits = 0x7FF8000000000000ull; #else #error Unsupported or unset target architecture diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h index 8dcb66c5f5669b..6146a5d070d183 100644 --- a/src/coreclr/jit/valuenumfuncs.h +++ b/src/coreclr/jit/valuenumfuncs.h @@ -186,6 +186,9 @@ ValueNumFuncDef(HWI_##isa##_##name, argCount, false, false, false) // All of t #elif defined (TARGET_LOONGARCH64) //TODO-LOONGARCH64-CQ: add LoongArch64's Hardware Intrinsics Instructions if supported. +#elif defined (TARGET_RISCV64) + // TODO RISCV64 + #else #error Unsupported platform #endif diff --git a/src/coreclr/nativeaot/Runtime/threadstore.cpp b/src/coreclr/nativeaot/Runtime/threadstore.cpp index 1b468e2c41d028..f8bad6a22d61e2 100644 --- a/src/coreclr/nativeaot/Runtime/threadstore.cpp +++ b/src/coreclr/nativeaot/Runtime/threadstore.cpp @@ -335,6 +335,8 @@ void ThreadStore::ResumeAllThreads(bool waitForGCEvent) } } // ResumeAllThreads +#ifndef DACCESS_COMPILE + void ThreadStore::InitiateThreadAbort(Thread* targetThread, Object * threadAbortException, bool doRudeAbort) { SuspendAllThreads(/* waitForGCEvent = */ false); @@ -407,6 +409,8 @@ COOP_PINVOKE_HELPER(void, RhpCancelThreadAbort, (void* thread)) GetThreadStore()->CancelThreadAbort((Thread*)thread); } +#endif // DACCESS_COMPILE + C_ASSERT(sizeof(Thread) == sizeof(ThreadBuffer)); #ifndef _MSC_VER diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 88dcd4e8d77f7a..2e407f2616335a 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -2100,10 +2100,10 @@ typedef struct _KNONVOLATILE_CONTEXT_POINTERS { #elif defined(HOST_RISCV64) -#error "TODO-RISCV64: review this when src/coreclr/pal/src/arch/riscv64/asmconstants.h is ported" +// #error "TODO-RISCV64: review this when src/coreclr/pal/src/arch/riscv64/asmconstants.h is ported" // Please refer to src/coreclr/pal/src/arch/riscv64/asmconstants.h -#define CONTEXT_RISCV64 0x04000000L +#define CONTEXT_RISCV64 0x01000000L #define CONTEXT_CONTROL (CONTEXT_RISCV64 | 0x1) #define CONTEXT_INTEGER (CONTEXT_RISCV64 | 0x2) @@ -2150,6 +2150,7 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { // // Integer registers. // + DWORD64 R0; DWORD64 Ra; DWORD64 Sp; DWORD64 Gp; @@ -2157,7 +2158,7 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { DWORD64 T0; DWORD64 T1; DWORD64 T2; - DWORD64 S0; + DWORD64 Fp; DWORD64 S1; DWORD64 A0; DWORD64 A1; @@ -2197,20 +2198,7 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { typedef struct _KNONVOLATILE_CONTEXT_POINTERS { - PDWORD64 Ra; - PDWORD64 Tp; - PDWORD64 T0; - PDWORD64 T1; - PDWORD64 S0; PDWORD64 S1; - PDWORD64 A0; - PDWORD64 A1; - PDWORD64 A2; - PDWORD64 A3; - PDWORD64 A4; - PDWORD64 A5; - PDWORD64 A6; - PDWORD64 A7; PDWORD64 S2; PDWORD64 S3; PDWORD64 S4; @@ -2221,31 +2209,23 @@ typedef struct _KNONVOLATILE_CONTEXT_POINTERS { PDWORD64 S9; PDWORD64 S10; PDWORD64 S11; - PDWORD64 T3; - PDWORD64 T4; - PDWORD64 T5; - PDWORD64 T6; - - PDWORD64 FS0; - PDWORD64 FS1; - PDWORD64 FA0; - PDWORD64 FA1; - PDWORD64 FA2; - PDWORD64 FA3; - PDWORD64 FA4; - PDWORD64 FA5; - PDWORD64 FA6; - PDWORD64 FA7; - PDWORD64 FS2; - PDWORD64 FS3; - PDWORD64 FS4; - PDWORD64 FS5; - PDWORD64 FS6; - PDWORD64 FS7; - PDWORD64 FS8; - PDWORD64 FS9; - PDWORD64 FS10; - PDWORD64 FS11; + PDWORD64 Fp; + PDWORD64 Gp; + PDWORD64 Tp; + PDWORD64 Ra; + + PDWORD64 F8; + PDWORD64 F9; + PDWORD64 F18; + PDWORD64 F19; + PDWORD64 F20; + PDWORD64 F21; + PDWORD64 F22; + PDWORD64 F23; + PDWORD64 F24; + PDWORD64 F25; + PDWORD64 F26; + PDWORD64 F27; } KNONVOLATILE_CONTEXT_POINTERS, *PKNONVOLATILE_CONTEXT_POINTERS; #elif defined(HOST_S390X) @@ -3713,7 +3693,7 @@ YieldProcessor() #elif defined(HOST_LOONGARCH64) __asm__ volatile( "dbar 0; \n"); #elif defined(HOST_RISCV64) - __asm__ __volatile__( "wfi"); + __asm__ __volatile__( "fence iorw, iorw"); // TODO #else return; #endif diff --git a/src/coreclr/pal/inc/rt/ntimage.h b/src/coreclr/pal/inc/rt/ntimage.h index 2cf95e40721ab2..fd927558aa8233 100644 --- a/src/coreclr/pal/inc/rt/ntimage.h +++ b/src/coreclr/pal/inc/rt/ntimage.h @@ -244,6 +244,7 @@ typedef struct _IMAGE_FILE_HEADER { #define IMAGE_FILE_MACHINE_ARM64 0xAA64 // ARM64 Little-Endian #define IMAGE_FILE_MACHINE_CEE 0xC0EE #define IMAGE_FILE_MACHINE_LOONGARCH64 0x6264 // LOONGARCH64. +#define IMAGE_FILE_MACHINE_RISCV64 0x5641 // RISCV64 // // Directory format. @@ -1020,6 +1021,12 @@ typedef IMAGE_RELOCATION UNALIGNED *PIMAGE_RELOCATION; #define IMAGE_REL_LOONGARCH64_PC 0x0003 #define IMAGE_REL_LOONGARCH64_JIR 0x0004 +// +// LOONGARCH64 relocation types +// +#define IMAGE_REL_RISCV64_PC 0x0003 +#define IMAGE_REL_RISCV64_JALR 0x0004 + // // CEF relocation types. // diff --git a/src/coreclr/pal/inc/unixasmmacrosriscv64.inc b/src/coreclr/pal/inc/unixasmmacrosriscv64.inc index 683ae88415a907..58eb07a6771ca5 100644 --- a/src/coreclr/pal/inc/unixasmmacrosriscv64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosriscv64.inc @@ -1,42 +1,328 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -#error "TODO-RISCV64: review this; missing many macros for VM" - .macro NESTED_ENTRY Name, Section, Handler - LEAF_ENTRY \Name, \Section - .ifnc \Handler, NoHandler - .personality C_FUNC(\Handler) - .endif + LEAF_ENTRY \Name, \Section + .ifnc \Handler, NoHandler + .cfi_personality 0x1c, C_FUNC(\Handler) // 0x1c == DW_EH_PE_pcrel | DW_EH_PE_sdata8 + .endif .endm .macro NESTED_END Name, Section - LEAF_END \Name, \Section + LEAF_END \Name, \Section .endm .macro PATCH_LABEL Name - .global C_FUNC(\Name) + .global C_FUNC(\Name) C_FUNC(\Name): .endm .macro LEAF_ENTRY Name, Section - .global C_FUNC(\Name) - .type \Name, %function + .global C_FUNC(\Name) + .type \Name, %function C_FUNC(\Name): - .cfi_startproc + .cfi_startproc .endm .macro LEAF_END Name, Section - .size \Name, .-\Name - .cfi_endproc + .size \Name, .-\Name + .cfi_endproc .endm .macro LEAF_END_MARKED Name, Section C_FUNC(\Name\()_End): - .global C_FUNC(\Name\()_End) - LEAF_END \Name, \Section + .global C_FUNC(\Name\()_End) + LEAF_END \Name, \Section + // make sure this symbol gets its own address + nop +.endm + +.macro PREPARE_EXTERNAL_VAR Name, HelperReg + lla \HelperReg, \Name +.endm + +.macro PROLOG_STACK_ALLOC Size + addi sp, sp, -\Size + //.cfi_adjust_cfa_offset \Size + .cfi_def_cfa sp,\Size +.endm + +.macro EPILOG_STACK_FREE Size + addi sp, sp, \Size + //.cfi_adjust_cfa_offset -\Size + .cfi_def_cfa sp,-\Size +.endm + +.macro EPILOG_STACK_RESTORE + ori sp, fp, 0 + .cfi_restore sp +.endm + +.macro PROLOG_SAVE_REG reg, ofs + sd \reg, \ofs(sp) + .cfi_rel_offset \reg, \ofs +.endm + +.macro PROLOG_SAVE_REG_PAIR reg1, reg2, ofs, __def_cfa_save=0 + sd \reg1, \ofs(sp) + sd \reg2, (\ofs+8)(sp) + .cfi_rel_offset \reg1, \ofs + .cfi_rel_offset \reg1, \ofs + 8 + .if (\__def_cfa_save == 1) + addi fp, sp, 0 + .cfi_def_cfa_register fp + .endif +.endm + +.macro PROLOG_SAVE_REG_PAIR_INDEXED reg1, reg2, ssize, __def_cfa_save=1 + addi sp, sp, -\ssize + + sd \reg1, 0(sp) + sd \reg2, 8(sp) + + .cfi_adjust_cfa_offset -\ssize + .cfi_rel_offset \reg1, 0 + .cfi_rel_offset \reg2, 8 + .if (\__def_cfa_save == 1) + addi fp, sp, 0 + .cfi_def_cfa_register fp + .endif +.endm + +.macro EPILOG_RESTORE_REG reg, ofs + ld \reg, (\ofs)(sp) + .cfi_restore \reg + .cfi_def_cfa_register sp +.endm + +.macro EPILOG_RESTORE_REG_PAIR reg1, reg2, ofs + ld \reg2, (\ofs+8)(sp) + ld \reg1, (\ofs)(sp) + .cfi_restore \reg2 + .cfi_restore \reg1 +.endm + +.macro EPILOG_RESTORE_REG_PAIR_INDEXED reg1, reg2, ssize + ld \reg2, 8(sp) + ld \reg1, 0(sp) + .cfi_restore \reg2 + .cfi_restore \reg1 + + addi sp, sp, \ssize + .cfi_def_cfa sp,-\ssize +.endm + +.macro EPILOG_RETURN + jalr x0, ra, 0 .endm .macro EMIT_BREAKPOINT - ebreak + ebreak +.endm + +.macro EPILOG_BRANCH Target + j \Target +.endm + +.macro EPILOG_BRANCH_REG reg + jalr x0, \reg, 0 +.endm + +//----------------------------------------------------------------------------- +// The Following sets of SAVE_*_REGISTERS expect the memory to be reserved and +// base address to be passed in $reg +// + +// Reserve 64 bytes of memory before calling SAVE_CALLEESAVED_REGISTERS +.macro SAVE_CALLEESAVED_REGISTERS reg, ofs + PROLOG_SAVE_REG_PAIR s1, s2, \ofs + 16 + PROLOG_SAVE_REG_PAIR s3, s4, \ofs + 32 + PROLOG_SAVE_REG_PAIR s5, s6, \ofs + 48 + PROLOG_SAVE_REG_PAIR s7, s8, \ofs + 64 + PROLOG_SAVE_REG_PAIR s9, s10, \ofs + 80 + PROLOG_SAVE_REG_PAIR s11, tp \ofs + 96 + PROLOG_SAVE_REG gp, \ofs + 112 +.endm + +// Reserve 64 bytes of memory before calling SAVE_ARGUMENT_REGISTERS +.macro SAVE_ARGUMENT_REGISTERS reg, ofs + sd a0, (\ofs)(\reg) + sd a1, (\ofs + 8)(\reg) + sd a2, (\ofs + 16)(\reg) + sd a3, (\ofs + 24)(\reg) + sd a4, (\ofs + 32)(\reg) + sd a5, (\ofs + 40)(\reg) + sd a6, (\ofs + 48)(\reg) + sd a7, (\ofs + 56)(\reg) +.endm + +// Reserve 64 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS +.macro SAVE_FLOAT_ARGUMENT_REGISTERS reg, ofs + fsd fa0, (\ofs)(\reg) + fsd fa1, (\ofs + 8)(\reg) + fsd fa2, (\ofs + 16)(\reg) + fsd fa3, (\ofs + 24)(\reg) + fsd fa4, (\ofs + 32)(\reg) + fsd fa5, (\ofs + 40)(\reg) + fsd fa6, (\ofs + 48)(\reg) + fsd fa7, (\ofs + 56)(\reg) +.endm + +// Reserve 64 bytes of memory before calling SAVE_FLOAT_CALLEESAVED_REGISTERS +.macro SAVE_FLOAT_CALLEESAVED_REGISTERS reg, ofs +// TODO RISCV NYI + sw ra, 0(zero) +.endm + +.macro RESTORE_CALLEESAVED_REGISTERS reg, ofs + EPILOG_RESTORE_REG gp \ofs + 112 + EPILOG_RESTORE_REG_PAIR s11, tp \ofs + 96 + EPILOG_RESTORE_REG_PAIR s9, s10, \ofs + 80 + EPILOG_RESTORE_REG_PAIR s7, s8, \ofs + 64 + EPILOG_RESTORE_REG_PAIR s5, s6, \ofs + 48 + EPILOG_RESTORE_REG_PAIR s3, s4, \ofs + 32 + EPILOG_RESTORE_REG_PAIR s1, s2, \ofs + 16 +.endm + +.macro RESTORE_ARGUMENT_REGISTERS reg, ofs + ld a0, (\ofs)(\reg) + ld a1, (\ofs + 8)(\reg) + ld a2, (\ofs + 16)(\reg) + ld a3, (\ofs + 24)(\reg) + ld a4, (\ofs + 32)(\reg) + ld a5, (\ofs + 40)(\reg) + ld a6, (\ofs + 48)(\reg) + ld a7, (\ofs + 56)(\reg) +.endm + +.macro RESTORE_FLOAT_ARGUMENT_REGISTERS reg, ofs + fld fa0, (\ofs)(\reg) + fld fa1, (\ofs + 8)(\reg) + fld fa2, (\ofs + 16)(\reg) + fld fa3, (\ofs + 24)(\reg) + fld fa4, (\ofs + 32)(\reg) + fld fa5, (\ofs + 40)(\reg) + fld fa6, (\ofs + 48)(\reg) + fld fa7, (\ofs + 56)(\reg) +.endm + +.macro RESTORE_FLOAT_CALLEESAVED_REGISTERS reg, ofs +// TODO RISCV NYI + sw ra, 0(zero) +.endm + +//----------------------------------------------------------------------------- +// Define the prolog for a TransitionBlock-based method. This macro should be called first in the method and +// comprises the entire prolog.The locals must be 8 byte aligned +// +// Save_argument_registers: +// GPR_a7 +// GPR_a6 +// GPR_a5 +// GPR_a4 +// GPR_a3 +// GPR_a2 +// GPR_a1 +// GPR_a0 +// +// General Registers: +// GPR_tp +// GPR_s8 +// GPR_s7 +// GPR_s6 +// GPR_s5 +// GPR_s4 +// GPR_s3 +// GPR_s2 +// GPR_s1 +// GPR_s0 +// GPR_ra +// GPR_fp +// +// Float Point: +// FPR_f27 / fs11 +// FPR_f26 / fs10 +// FPR_f25 / fs9 +// FPR_f24 / fs8 +// FPR_f23 / fs7 +// FPR_f22 / fs6 +// FPR_f21 / fs5 +// FPR_f20 / fs4 +// FPR_f19 / fs3 +// FPR_f18 / fs2 +// FPR_f9 / fs1 +// FPR_f8 / fs0 +// Extra: +// +.macro PROLOG_WITH_TRANSITION_BLOCK extraParameters = 0, extraLocals = 0, SaveFPRegs = 1 + __PWTB_SaveFPArgs = \SaveFPRegs + + __PWTB_FloatArgumentRegisters = \extraLocals + + .if ((__PWTB_FloatArgumentRegisters % 16) != 0) + __PWTB_FloatArgumentRegisters = __PWTB_FloatArgumentRegisters + 8 + .endif + + __PWTB_TransitionBlock = __PWTB_FloatArgumentRegisters + + .if (__PWTB_SaveFPArgs == 1) + __PWTB_TransitionBlock = __PWTB_TransitionBlock + SIZEOF__FloatArgumentRegisters + .endif + + + __PWTB_CalleeSavedRegisters = __PWTB_TransitionBlock + __PWTB_ArgumentRegisters = __PWTB_TransitionBlock + 120 + + // Including fp, ra, s1-s11, tp, gp, and (a0-a7)arguments. (1+1+11+1+1)*8 + 8*8. + __PWTB_StackAlloc = __PWTB_TransitionBlock + 120 + 64 + PROLOG_STACK_ALLOC __PWTB_StackAlloc + PROLOG_SAVE_REG_PAIR fp, ra, __PWTB_CalleeSavedRegisters, 1 + + // First, Spill argument registers. + SAVE_ARGUMENT_REGISTERS sp, __PWTB_ArgumentRegisters + + // Then, Spill callee saved registers. sp=r3. + SAVE_CALLEESAVED_REGISTERS sp, __PWTB_CalleeSavedRegisters + + // saving is f10-17. + .if (__PWTB_SaveFPArgs == 1) + SAVE_FLOAT_ARGUMENT_REGISTERS sp, __PWTB_FloatArgumentRegisters + .endif + +.endm + +.macro EPILOG_WITH_TRANSITION_BLOCK_RETURN +// TODO RISCV NYI + sw ra, 0(zero) +.endm + + +//----------------------------------------------------------------------------- +// Provides a matching epilog to PROLOG_WITH_TRANSITION_BLOCK and ends by preparing for tail-calling. +// Since this is a tail call argument registers are restored. +// +.macro EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + .if (__PWTB_SaveFPArgs == 1) + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, __PWTB_FloatArgumentRegisters + .endif + + RESTORE_CALLEESAVED_REGISTERS sp, __PWTB_CalleeSavedRegisters + + RESTORE_ARGUMENT_REGISTERS sp, __PWTB_ArgumentRegisters + + EPILOG_RESTORE_REG_PAIR fp, ra, __PWTB_CalleeSavedRegisters + + EPILOG_STACK_FREE __PWTB_StackAlloc +.endm + +// ------------------------------------------------------------------ +// Macro to generate Redirection Stubs +// +// $reason : reason for redirection +// Eg. GCThreadControl +// NOTE: If you edit this macro, make sure you update GetCONTEXTFromRedirectedStubStackFrame. +// This function is used by both the personality routine and the debugger to retrieve the original CONTEXT. +.macro GenerateRedirectedHandledJITCaseStub reason +// TODO RISCV NYI + sw ra, 0(zero) .endm diff --git a/src/coreclr/pal/prebuilt/inc/cordebug.h b/src/coreclr/pal/prebuilt/inc/cordebug.h index fa4b434eccf554..54bde6b445ea84 100644 --- a/src/coreclr/pal/prebuilt/inc/cordebug.h +++ b/src/coreclr/pal/prebuilt/inc/cordebug.h @@ -1476,7 +1476,8 @@ enum CorDebugPlatform CORDB_PLATFORM_POSIX_X86 = ( CORDB_PLATFORM_POSIX_AMD64 + 1 ) , CORDB_PLATFORM_POSIX_ARM = ( CORDB_PLATFORM_POSIX_X86 + 1 ) , CORDB_PLATFORM_POSIX_ARM64 = ( CORDB_PLATFORM_POSIX_ARM + 1 ) , - CORDB_PLATFORM_POSIX_LOONGARCH64 = ( CORDB_PLATFORM_POSIX_ARM64 + 1 ) + CORDB_PLATFORM_POSIX_LOONGARCH64 = ( CORDB_PLATFORM_POSIX_ARM64 + 1 ) , + CORDB_PLATFORM_POSIX_RISCV64 = ( CORDB_PLATFORM_POSIX_LOONGARCH64 + 1 ) } CorDebugPlatform; @@ -9256,7 +9257,71 @@ enum CorDebugRegister REGISTER_LOONGARCH64_F28 = ( REGISTER_LOONGARCH64_F27 + 1 ) , REGISTER_LOONGARCH64_F29 = ( REGISTER_LOONGARCH64_F28 + 1 ) , REGISTER_LOONGARCH64_F30 = ( REGISTER_LOONGARCH64_F29 + 1 ) , - REGISTER_LOONGARCH64_F31 = ( REGISTER_LOONGARCH64_F30 + 1 ) + REGISTER_LOONGARCH64_F31 = ( REGISTER_LOONGARCH64_F30 + 1 ), + REGISTER_RISCV64_PC = 0, + REGISTER_RISCV64_RA = ( REGISTER_RISCV64_PC + 1), + REGISTER_RISCV64_SP = ( REGISTER_RISCV64_RA + 1), + REGISTER_RISCV64_GP = ( REGISTER_RISCV64_SP + 1), + REGISTER_RISCV64_TP = ( REGISTER_RISCV64_GP + 1 ), + REGISTER_RISCV64_T0 = ( REGISTER_RISCV64_TP + 1 ), + REGISTER_RISCV64_T1 = ( REGISTER_RISCV64_T0 + 1 ), + REGISTER_RISCV64_T2 = ( REGISTER_RISCV64_T1 + 1 ), + REGISTER_RISCV64_FP = ( REGISTER_RISCV64_T2 + 1 ), + REGISTER_RISCV64_S1 = ( REGISTER_RISCV64_FP + 1 ), + REGISTER_RISCV64_A0 = ( REGISTER_RISCV64_S1 + 1 ), + REGISTER_RISCV64_A1 = ( REGISTER_RISCV64_A0 + 1 ), + REGISTER_RISCV64_A2 = ( REGISTER_RISCV64_A1 + 1 ), + REGISTER_RISCV64_A3 = ( REGISTER_RISCV64_A2 + 1 ), + REGISTER_RISCV64_A4 = ( REGISTER_RISCV64_A3 + 1 ), + REGISTER_RISCV64_A5 = ( REGISTER_RISCV64_A4 + 1 ), + REGISTER_RISCV64_A6 = ( REGISTER_RISCV64_A5 + 1 ), + REGISTER_RISCV64_A7 = ( REGISTER_RISCV64_A6 + 1 ), + REGISTER_RISCV64_S2 = ( REGISTER_RISCV64_A7 + 1 ), + REGISTER_RISCV64_S3 = ( REGISTER_RISCV64_S2 + 1 ), + REGISTER_RISCV64_S4 = ( REGISTER_RISCV64_S3 + 1 ), + REGISTER_RISCV64_S5 = ( REGISTER_RISCV64_S4 + 1 ), + REGISTER_RISCV64_S6 = ( REGISTER_RISCV64_S5 + 1 ), + REGISTER_RISCV64_S7 = ( REGISTER_RISCV64_S6 + 1 ), + REGISTER_RISCV64_S8 = ( REGISTER_RISCV64_S7 + 1 ), + REGISTER_RISCV64_S9 = ( REGISTER_RISCV64_S8 + 1 ), + REGISTER_RISCV64_S10 = ( REGISTER_RISCV64_S9 + 1 ), + REGISTER_RISCV64_S11 = ( REGISTER_RISCV64_S10 + 1 ), + REGISTER_RISCV64_T3 = ( REGISTER_RISCV64_S11 + 1 ), + REGISTER_RISCV64_T4 = ( REGISTER_RISCV64_T3 + 1 ), + REGISTER_RISCV64_T5 = ( REGISTER_RISCV64_T4 + 1 ), + REGISTER_RISCV64_T6 = ( REGISTER_RISCV64_T5 + 1 ), + REGISTER_RISCV64_F0 = ( REGISTER_RISCV64_T6 + 1 ), + REGISTER_RISCV64_F1 = ( REGISTER_RISCV64_F0 + 1 ), + REGISTER_RISCV64_F2 = ( REGISTER_RISCV64_F1 + 1 ), + REGISTER_RISCV64_F3 = ( REGISTER_RISCV64_F2 + 1 ), + REGISTER_RISCV64_F4 = ( REGISTER_RISCV64_F3 + 1 ), + REGISTER_RISCV64_F5 = ( REGISTER_RISCV64_F4 + 1 ), + REGISTER_RISCV64_F6 = ( REGISTER_RISCV64_F5 + 1 ), + REGISTER_RISCV64_F7 = ( REGISTER_RISCV64_F6 + 1 ), + REGISTER_RISCV64_F8 = ( REGISTER_RISCV64_F7 + 1 ), + REGISTER_RISCV64_F9 = ( REGISTER_RISCV64_F8 + 1 ), + REGISTER_RISCV64_F10 = ( REGISTER_RISCV64_F9 + 1 ), + REGISTER_RISCV64_F11 = ( REGISTER_RISCV64_F10 + 1 ), + REGISTER_RISCV64_F12 = ( REGISTER_RISCV64_F11 + 1 ), + REGISTER_RISCV64_F13 = ( REGISTER_RISCV64_F12 + 1 ), + REGISTER_RISCV64_F14 = ( REGISTER_RISCV64_F13 + 1 ), + REGISTER_RISCV64_F15 = ( REGISTER_RISCV64_F14 + 1 ), + REGISTER_RISCV64_F16 = ( REGISTER_RISCV64_F15 + 1 ), + REGISTER_RISCV64_F17 = ( REGISTER_RISCV64_F16 + 1 ), + REGISTER_RISCV64_F18 = ( REGISTER_RISCV64_F17 + 1 ), + REGISTER_RISCV64_F19 = ( REGISTER_RISCV64_F18 + 1 ), + REGISTER_RISCV64_F20 = ( REGISTER_RISCV64_F19 + 1 ), + REGISTER_RISCV64_F21 = ( REGISTER_RISCV64_F20 + 1 ), + REGISTER_RISCV64_F22 = ( REGISTER_RISCV64_F21 + 1 ), + REGISTER_RISCV64_F23 = ( REGISTER_RISCV64_F22 + 1 ), + REGISTER_RISCV64_F24 = ( REGISTER_RISCV64_F23 + 1 ), + REGISTER_RISCV64_F25 = ( REGISTER_RISCV64_F24 + 1 ), + REGISTER_RISCV64_F26 = ( REGISTER_RISCV64_F25 + 1 ), + REGISTER_RISCV64_F27 = ( REGISTER_RISCV64_F26 + 1 ), + REGISTER_RISCV64_F28 = ( REGISTER_RISCV64_F27 + 1 ), + REGISTER_RISCV64_F29 = ( REGISTER_RISCV64_F28 + 1 ), + REGISTER_RISCV64_F30 = ( REGISTER_RISCV64_F29 + 1 ), + REGISTER_RISCV64_F31 = ( REGISTER_RISCV64_F30 + 1 ), } CorDebugRegister; diff --git a/src/coreclr/pal/src/arch/riscv64/asmconstants.h b/src/coreclr/pal/src/arch/riscv64/asmconstants.h index 015ac39c13fc9f..a3bd635dc26ce8 100644 --- a/src/coreclr/pal/src/arch/riscv64/asmconstants.h +++ b/src/coreclr/pal/src/arch/riscv64/asmconstants.h @@ -7,9 +7,9 @@ // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/2d865a2964fe06bfc569ab00c74e152b582ed764/riscv-dwarf.adoc // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/2d865a2964fe06bfc569ab00c74e152b582ed764/riscv-cc.adoc -#error "TODO-RISCV64: review this when other files are ported in this directory" +// #error "TODO-RISCV64: review this when other files are ported in this directory" -#define CONTEXT_RISCV64 0x04000000L +#define CONTEXT_RISCV64 0x01000000L #define CONTEXT_CONTROL_BIT (0) #define CONTEXT_INTEGER_BIT (1) @@ -23,51 +23,84 @@ #define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT) -#define SIZEOF_RISCV64_INTR 8 +#define SIZEOF_RISCV64_GPR 8 #define SIZEOF_RISCV64_FPR 8 -#define CONTEXT_X0 0 // hardwired zero -#define CONTEXT_X1 CONTEXT_X0 + SIZEOF_RISCV64_INTR -#define CONTEXT_X2 CONTEXT_X1 + SIZEOF_RISCV64_INTR -#define CONTEXT_X3 CONTEXT_X2 + SIZEOF_RISCV64_INTR -#define CONTEXT_X4 CONTEXT_X3 + SIZEOF_RISCV64_INTR -#define CONTEXT_X5 CONTEXT_X4 + SIZEOF_RISCV64_INTR -#define CONTEXT_X6 CONTEXT_X5 + SIZEOF_RISCV64_INTR -#define CONTEXT_X7 CONTEXT_X6 + SIZEOF_RISCV64_INTR -#define CONTEXT_X8 CONTEXT_X7 + SIZEOF_RISCV64_INTR -#define CONTEXT_X9 CONTEXT_X8 + SIZEOF_RISCV64_INTR -#define CONTEXT_X10 CONTEXT_X9 + SIZEOF_RISCV64_INTR -#define CONTEXT_X11 CONTEXT_X10 + SIZEOF_RISCV64_INTR -#define CONTEXT_X12 CONTEXT_X11 + SIZEOF_RISCV64_INTR -#define CONTEXT_X13 CONTEXT_X12 + SIZEOF_RISCV64_INTR -#define CONTEXT_X14 CONTEXT_X13 + SIZEOF_RISCV64_INTR -#define CONTEXT_X15 CONTEXT_X14 + SIZEOF_RISCV64_INTR -#define CONTEXT_X16 CONTEXT_X15 + SIZEOF_RISCV64_INTR -#define CONTEXT_X17 CONTEXT_X16 + SIZEOF_RISCV64_INTR -#define CONTEXT_X18 CONTEXT_X17 + SIZEOF_RISCV64_INTR -#define CONTEXT_X19 CONTEXT_X18 + SIZEOF_RISCV64_INTR -#define CONTEXT_X20 CONTEXT_X19 + SIZEOF_RISCV64_INTR -#define CONTEXT_X21 CONTEXT_X20 + SIZEOF_RISCV64_INTR -#define CONTEXT_X22 CONTEXT_X21 + SIZEOF_RISCV64_INTR -#define CONTEXT_X23 CONTEXT_X22 + SIZEOF_RISCV64_INTR -#define CONTEXT_X24 CONTEXT_X23 + SIZEOF_RISCV64_INTR -#define CONTEXT_X25 CONTEXT_X24 + SIZEOF_RISCV64_INTR -#define CONTEXT_X26 CONTEXT_X25 + SIZEOF_RISCV64_INTR -#define CONTEXT_X27 CONTEXT_X26 + SIZEOF_RISCV64_INTR -#define CONTEXT_X28 CONTEXT_X27 + SIZEOF_RISCV64_INTR -#define CONTEXT_X29 CONTEXT_X28 + SIZEOF_RISCV64_INTR -#define CONTEXT_X30 CONTEXT_X29 + SIZEOF_RISCV64_INTR -#define CONTEXT_X31 CONTEXT_X30 + SIZEOF_RISCV64_INTR - -#define CONTEXT_Pc CONTEXT_X31 + SIZEOF_RISCV64_INTR -#define CONTEXT_FPU_OFFSET CONTEXT_Pc + SIZEOF_RISCV64_INTR +#define CONTEXT_ContextFlags 0 +#define CONTEXT_X0 CONTEXT_ContextFlags + SIZEOF_RISCV64_GPR // hardwired zero +#define CONTEXT_X1 CONTEXT_X0 + SIZEOF_RISCV64_GPR +#define CONTEXT_X2 CONTEXT_X1 + SIZEOF_RISCV64_GPR +#define CONTEXT_X3 CONTEXT_X2 + SIZEOF_RISCV64_GPR +#define CONTEXT_X4 CONTEXT_X3 + SIZEOF_RISCV64_GPR +#define CONTEXT_X5 CONTEXT_X4 + SIZEOF_RISCV64_GPR +#define CONTEXT_X6 CONTEXT_X5 + SIZEOF_RISCV64_GPR +#define CONTEXT_X7 CONTEXT_X6 + SIZEOF_RISCV64_GPR +#define CONTEXT_X8 CONTEXT_X7 + SIZEOF_RISCV64_GPR +#define CONTEXT_X9 CONTEXT_X8 + SIZEOF_RISCV64_GPR +#define CONTEXT_X10 CONTEXT_X9 + SIZEOF_RISCV64_GPR +#define CONTEXT_X11 CONTEXT_X10 + SIZEOF_RISCV64_GPR +#define CONTEXT_X12 CONTEXT_X11 + SIZEOF_RISCV64_GPR +#define CONTEXT_X13 CONTEXT_X12 + SIZEOF_RISCV64_GPR +#define CONTEXT_X14 CONTEXT_X13 + SIZEOF_RISCV64_GPR +#define CONTEXT_X15 CONTEXT_X14 + SIZEOF_RISCV64_GPR +#define CONTEXT_X16 CONTEXT_X15 + SIZEOF_RISCV64_GPR +#define CONTEXT_X17 CONTEXT_X16 + SIZEOF_RISCV64_GPR +#define CONTEXT_X18 CONTEXT_X17 + SIZEOF_RISCV64_GPR +#define CONTEXT_X19 CONTEXT_X18 + SIZEOF_RISCV64_GPR +#define CONTEXT_X20 CONTEXT_X19 + SIZEOF_RISCV64_GPR +#define CONTEXT_X21 CONTEXT_X20 + SIZEOF_RISCV64_GPR +#define CONTEXT_X22 CONTEXT_X21 + SIZEOF_RISCV64_GPR +#define CONTEXT_X23 CONTEXT_X22 + SIZEOF_RISCV64_GPR +#define CONTEXT_X24 CONTEXT_X23 + SIZEOF_RISCV64_GPR +#define CONTEXT_X25 CONTEXT_X24 + SIZEOF_RISCV64_GPR +#define CONTEXT_X26 CONTEXT_X25 + SIZEOF_RISCV64_GPR +#define CONTEXT_X27 CONTEXT_X26 + SIZEOF_RISCV64_GPR +#define CONTEXT_X28 CONTEXT_X27 + SIZEOF_RISCV64_GPR +#define CONTEXT_X29 CONTEXT_X28 + SIZEOF_RISCV64_GPR +#define CONTEXT_X30 CONTEXT_X29 + SIZEOF_RISCV64_GPR +#define CONTEXT_X31 CONTEXT_X30 + SIZEOF_RISCV64_GPR + +#define CONTEXT_Pc CONTEXT_X31 + SIZEOF_RISCV64_GPR +#define CONTEXT_FPU_OFFSET CONTEXT_Pc + SIZEOF_RISCV64_GPR +#define CONTEXT_Ra CONTEXT_X1 #define CONTEXT_Sp CONTEXT_X2 +#define CONTEXT_Gp CONTEXT_X3 #define CONTEXT_Tp CONTEXT_X4 #define CONTEXT_Fp CONTEXT_X8 +#define CONTEXT_S0 CONTEXT_X8 +#define CONTEXT_S1 CONTEXT_X9 +#define CONTEXT_S2 CONTEXT_X18 +#define CONTEXT_S3 CONTEXT_X19 +#define CONTEXT_S4 CONTEXT_X20 +#define CONTEXT_S5 CONTEXT_X21 +#define CONTEXT_S6 CONTEXT_X22 +#define CONTEXT_S7 CONTEXT_X23 +#define CONTEXT_S8 CONTEXT_X24 +#define CONTEXT_S9 CONTEXT_X25 +#define CONTEXT_S10 CONTEXT_X26 +#define CONTEXT_S11 CONTEXT_X27 + +#define CONTEXT_A0 CONTEXT_X10 +#define CONTEXT_A1 CONTEXT_X11 +#define CONTEXT_A2 CONTEXT_X12 +#define CONTEXT_A3 CONTEXT_X13 +#define CONTEXT_A4 CONTEXT_X14 +#define CONTEXT_A5 CONTEXT_X15 +#define CONTEXT_A6 CONTEXT_X16 +#define CONTEXT_A7 CONTEXT_X17 + +#define CONTEXT_T0 CONTEXT_X5 +#define CONTEXT_T1 CONTEXT_X6 +#define CONTEXT_T2 CONTEXT_X7 +#define CONTEXT_T3 CONTEXT_X28 +#define CONTEXT_T4 CONTEXT_X29 +#define CONTEXT_T5 CONTEXT_X30 +#define CONTEXT_T6 CONTEXT_X31 + #define CONTEXT_F0 0 -#define CONTEXT_F1 CONTEXT_F1 + SIZEOF_RISCV64_FPR -#define CONTEXT_F2 CONTEXT_F2 + SIZEOF_RISCV64_FPR +#define CONTEXT_F1 CONTEXT_F0 + SIZEOF_RISCV64_FPR +#define CONTEXT_F2 CONTEXT_F1 + SIZEOF_RISCV64_FPR #define CONTEXT_F3 CONTEXT_F2 + SIZEOF_RISCV64_FPR #define CONTEXT_F4 CONTEXT_F3 + SIZEOF_RISCV64_FPR #define CONTEXT_F5 CONTEXT_F4 + SIZEOF_RISCV64_FPR diff --git a/src/coreclr/pal/src/arch/riscv64/callsignalhandlerwrapper.S b/src/coreclr/pal/src/arch/riscv64/callsignalhandlerwrapper.S index a7cd5b6c4d2403..89962f0b2549f6 100644 --- a/src/coreclr/pal/src/arch/riscv64/callsignalhandlerwrapper.S +++ b/src/coreclr/pal/src/arch/riscv64/callsignalhandlerwrapper.S @@ -4,4 +4,32 @@ #include "unixasmmacros.inc" #include "asmconstants.h" -#error "TODO-RISCV64: missing implementation" +.macro CALL_SIGNAL_HANDLER_WRAPPER Alignment + +.globl C_FUNC(SignalHandlerWorkerReturnOffset\Alignment) +C_FUNC(SignalHandlerWorkerReturnOffset\Alignment): + .dword LOCAL_LABEL(SignalHandlerWorkerReturn\Alignment)-C_FUNC(CallSignalHandlerWrapper\Alignment) + +// This function is never called, only a fake stack frame will be setup to have a return +// address set to SignalHandlerWorkerReturn during SIGSEGV handling. +// It enables the unwinder to unwind stack from the handling code to the actual failure site. +NESTED_ENTRY CallSignalHandlerWrapper\Alignment, _TEXT, NoHandler +__StackAllocationSize = (128 + 8 + 8 + \Alignment) // red zone + fp + ra + alignment + PROLOG_STACK_ALLOC __StackAllocationSize + .cfi_adjust_cfa_offset __StackAllocationSize + + PROLOG_SAVE_REG_PAIR fp, ra, 0 + + call signal_handler_worker + +LOCAL_LABEL(SignalHandlerWorkerReturn\Alignment): + EPILOG_RESTORE_REG_PAIR fp, ra, 0 + EPILOG_STACK_FREE __StackAllocationSize + jalr x0, ra, 0 + +NESTED_END CallSignalHandlerWrapper\Alignment, _TEXT + +.endm + +CALL_SIGNAL_HANDLER_WRAPPER 0 +CALL_SIGNAL_HANDLER_WRAPPER 8 diff --git a/src/coreclr/pal/src/arch/riscv64/context2.S b/src/coreclr/pal/src/arch/riscv64/context2.S index 8e3a01ac4f608a..c172ec992cf7ff 100644 --- a/src/coreclr/pal/src/arch/riscv64/context2.S +++ b/src/coreclr/pal/src/arch/riscv64/context2.S @@ -9,4 +9,246 @@ #include "unixasmmacros.inc" #include "asmconstants.h" -#error "TODO-RISCV64: missing implementation" +// Incoming: +// a0: Context* +// a1: Exception* +// +LEAF_ENTRY RtlRestoreContext, _TEXT +#ifdef HAS_ASAN + lw t1, CONTEXT_ContextFlags(a0) + andi t1, t1, 0x1 << CONTEXT_FLOATING_POINT_BIT + beq t1, zero, LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT) + + addi sp, sp, -16 + sd a0, 0(sp) + sd a1, 8(sp) + + call __asan_handle_no_return + + ld a0, 0(sp) + ld a1, 8(sp) + addi sp, sp, 16 + +LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT): +#endif + + addi t4, a0, 0 + lw t1, CONTEXT_ContextFlags(t4) + andi t1, t1, 0x1 << CONTEXT_FLOATING_POINT_BIT + beq t1, zero, LOCAL_LABEL(No_Restore_CONTEXT_FLOATING_POINT) + + //64-bits FPR. + addi t0, t4, CONTEXT_FPU_OFFSET + + fld f0, (CONTEXT_F0)(t0) + fld f1, (CONTEXT_F1)(t0) + fld f2, (CONTEXT_F2)(t0) + fld f3, (CONTEXT_F3)(t0) + fld f4, (CONTEXT_F4)(t0) + fld f5, (CONTEXT_F5)(t0) + fld f6, (CONTEXT_F6)(t0) + fld f7, (CONTEXT_F7)(t0) + fld f8, (CONTEXT_F8)(t0) + fld f9, (CONTEXT_F9)(t0) + fld f10, (CONTEXT_F10)(t0) + fld f11, (CONTEXT_F11)(t0) + fld f12, (CONTEXT_F12)(t0) + fld f13, (CONTEXT_F13)(t0) + fld f14, (CONTEXT_F14)(t0) + fld f15, (CONTEXT_F15)(t0) + fld f16, (CONTEXT_F16)(t0) + fld f17, (CONTEXT_F17)(t0) + fld f18, (CONTEXT_F18)(t0) + fld f19, (CONTEXT_F19)(t0) + fld f20, (CONTEXT_F20)(t0) + fld f21, (CONTEXT_F21)(t0) + fld f22, (CONTEXT_F22)(t0) + fld f23, (CONTEXT_F23)(t0) + fld f24, (CONTEXT_F24)(t0) + fld f25, (CONTEXT_F25)(t0) + fld f26, (CONTEXT_F26)(t0) + fld f27, (CONTEXT_F27)(t0) + fld f28, (CONTEXT_F28)(t0) + fld f29, (CONTEXT_F29)(t0) + fld f30, (CONTEXT_F30)(t0) + fld f31, (CONTEXT_F31)(t0) + + lw t1, (CONTEXT_FLOAT_CONTROL_OFFSET)(t0) + fscsr x0, t1 + +LOCAL_LABEL(No_Restore_CONTEXT_FLOATING_POINT): + + lw t1, CONTEXT_ContextFlags(t4) + andi t1, t1, 0x1 << CONTEXT_INTEGER_BIT + beq t1, zero, LOCAL_LABEL(No_Restore_CONTEXT_INTEGER) + + ld tp, (CONTEXT_Tp)(a0) + ld gp, (CONTEXT_Gp)(a0) + ld a1, (CONTEXT_A1)(a0) + ld a2, (CONTEXT_A2)(a0) + ld a3, (CONTEXT_A3)(a0) + ld a4, (CONTEXT_A4)(a0) + ld a5, (CONTEXT_A5)(a0) + ld a6, (CONTEXT_A6)(a0) + ld a7, (CONTEXT_A7)(a0) + ld t0, (CONTEXT_T0)(a0) + ld t1, (CONTEXT_T1)(a0) + ld t2, (CONTEXT_T2)(a0) + ld t3, (CONTEXT_T3)(a0) + ld t5, (CONTEXT_T5)(a0) + ld t6, (CONTEXT_T6)(a0) + + ld s1, (CONTEXT_S1)(a0) + ld s2, (CONTEXT_S2)(a0) + ld s3, (CONTEXT_S3)(a0) + ld s4, (CONTEXT_S4)(a0) + ld s5, (CONTEXT_S5)(a0) + ld s6, (CONTEXT_S6)(a0) + ld s7, (CONTEXT_S7)(a0) + ld s8, (CONTEXT_S8)(a0) + ld s9, (CONTEXT_S9)(a0) + ld s10, (CONTEXT_S10)(a0) + ld s11, (CONTEXT_S11)(a0) + + ld a0, (CONTEXT_A0)(a0) + +LOCAL_LABEL(No_Restore_CONTEXT_INTEGER): + + lw t1, CONTEXT_ContextFlags(t4) + andi t1, t1, 0x1 << CONTEXT_CONTROL_BIT + beq t1, zero, LOCAL_LABEL(No_Restore_CONTEXT_CONTROL) + + ld ra, (CONTEXT_Ra)(t4) + ld fp, (CONTEXT_Fp)(t4) + ld sp, (CONTEXT_Sp)(t4) + ld t1, (CONTEXT_Pc)(t4) // TODO t1 is not restored. + ld t4, (CONTEXT_T4)(t4) + jalr x0, t1, 0 + +LOCAL_LABEL(No_Restore_CONTEXT_CONTROL): + ld t4, (CONTEXT_T4)(t4) + jalr x0, ra, 0 +LEAF_END RtlRestoreContext, _TEXT + +// Incoming: +// a0: Context* + +LEAF_ENTRY RtlCaptureContext, _TEXT + PROLOG_STACK_ALLOC 16 + sd t1, 0(sp) + li t1, CONTEXT_FULL + sw t1, CONTEXT_ContextFlags(a0) + ld t1, 0(sp) + EPILOG_STACK_FREE 16 + tail CONTEXT_CaptureContext +LEAF_END RtlCaptureContext, _TEXT + +// Incoming: +// a0: Context* +// + +LEAF_ENTRY CONTEXT_CaptureContext, _TEXT + PROLOG_STACK_ALLOC 24 + sd t0, 0(sp) + sd t1, 8(sp) + sd t3, 16(sp) + + lw t1, CONTEXT_ContextFlags(a0) + li t0, CONTEXT_CONTROL + and t3, t1, t0 + bne t3, t0, LOCAL_LABEL(Done_CONTEXT_CONTROL) + + addi t0, sp, 24 + sd fp, CONTEXT_Fp(a0) + sd t0, CONTEXT_Sp(a0) + sd ra, CONTEXT_Ra(a0) + sd ra, CONTEXT_Pc(a0) + +LOCAL_LABEL(Done_CONTEXT_CONTROL): + + li t0, CONTEXT_INTEGER + and t3, t1, t0 + bne t3, t0, LOCAL_LABEL(Done_CONTEXT_INTEGER) + + ld t0, 0(sp) + ld t1, 8(sp) + ld t3, 16(sp) + + sd tp, (CONTEXT_Tp)(a0) + sd gp, (CONTEXT_Gp)(a0) + sd a1, (CONTEXT_A1)(a0) + sd a2, (CONTEXT_A2)(a0) + sd a3, (CONTEXT_A3)(a0) + sd a4, (CONTEXT_A4)(a0) + sd a5, (CONTEXT_A5)(a0) + sd a6, (CONTEXT_A6)(a0) + sd a7, (CONTEXT_A7)(a0) + sd t0, (CONTEXT_T0)(a0) + sd t1, (CONTEXT_T1)(a0) + sd t2, (CONTEXT_T2)(a0) + sd t3, (CONTEXT_T3)(a0) + sd t5, (CONTEXT_T5)(a0) + sd t6, (CONTEXT_T6)(a0) + + sd s1, (CONTEXT_S1)(a0) + sd s2, (CONTEXT_S2)(a0) + sd s3, (CONTEXT_S3)(a0) + sd s4, (CONTEXT_S4)(a0) + sd s5, (CONTEXT_S5)(a0) + sd s6, (CONTEXT_S6)(a0) + sd s7, (CONTEXT_S7)(a0) + sd s8, (CONTEXT_S8)(a0) + sd s9, (CONTEXT_S9)(a0) + sd s10, (CONTEXT_S10)(a0) + sd s11, (CONTEXT_S11)(a0) + +LOCAL_LABEL(Done_CONTEXT_INTEGER): + lw t1, CONTEXT_ContextFlags(a0) + + li t0, CONTEXT_FLOATING_POINT + and t3, t1, t0 + bne t3, t0, LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT) + + addi a0, a0, CONTEXT_FPU_OFFSET + + fsd f0, (CONTEXT_F0)(a0) + fsd f1, (CONTEXT_F1)(a0) + fsd f2, (CONTEXT_F2)(a0) + fsd f3, (CONTEXT_F3)(a0) + fsd f4, (CONTEXT_F4)(a0) + fsd f5, (CONTEXT_F5)(a0) + fsd f6, (CONTEXT_F6)(a0) + fsd f7, (CONTEXT_F7)(a0) + fsd f8, (CONTEXT_F8)(a0) + fsd f9, (CONTEXT_F9)(a0) + fsd f10, (CONTEXT_F10)(a0) + fsd f11, (CONTEXT_F11)(a0) + fsd f12, (CONTEXT_F12)(a0) + fsd f13, (CONTEXT_F13)(a0) + fsd f14, (CONTEXT_F14)(a0) + fsd f15, (CONTEXT_F15)(a0) + fsd f16, (CONTEXT_F16)(a0) + fsd f17, (CONTEXT_F17)(a0) + fsd f18, (CONTEXT_F18)(a0) + fsd f19, (CONTEXT_F19)(a0) + fsd f20, (CONTEXT_F20)(a0) + fsd f21, (CONTEXT_F21)(a0) + fsd f22, (CONTEXT_F22)(a0) + fsd f23, (CONTEXT_F23)(a0) + fsd f24, (CONTEXT_F24)(a0) + fsd f25, (CONTEXT_F25)(a0) + fsd f26, (CONTEXT_F26)(a0) + fsd f27, (CONTEXT_F27)(a0) + fsd f28, (CONTEXT_F28)(a0) + fsd f29, (CONTEXT_F29)(a0) + fsd f30, (CONTEXT_F30)(a0) + fsd f31, (CONTEXT_F31)(a0) + + frcsr t0 + sd t0, (CONTEXT_FLOAT_CONTROL_OFFSET)(a0) + +LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT): + + EPILOG_STACK_FREE 24 + jalr x0, ra, 0 +LEAF_END CONTEXT_CaptureContext, _TEXT diff --git a/src/coreclr/pal/src/arch/riscv64/dispatchexceptionwrapper.S b/src/coreclr/pal/src/arch/riscv64/dispatchexceptionwrapper.S deleted file mode 100644 index 41f5e08472dfff..00000000000000 --- a/src/coreclr/pal/src/arch/riscv64/dispatchexceptionwrapper.S +++ /dev/null @@ -1,13 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// -// Implementation of the PAL_DispatchExceptionWrapper that is -// interposed between a function that caused a hardware fault -// and PAL_DispatchException that throws an SEH exception for -// the fault, to make the stack unwindable. -// - -#include "unixasmmacros.inc" - -#error "TODO-RISCV64: missing implementation" diff --git a/src/coreclr/pal/src/arch/riscv64/exceptionhelper.S b/src/coreclr/pal/src/arch/riscv64/exceptionhelper.S index a7cd5b6c4d2403..ee0e7bde62b291 100644 --- a/src/coreclr/pal/src/arch/riscv64/exceptionhelper.S +++ b/src/coreclr/pal/src/arch/riscv64/exceptionhelper.S @@ -4,4 +4,45 @@ #include "unixasmmacros.inc" #include "asmconstants.h" -#error "TODO-RISCV64: missing implementation" +////////////////////////////////////////////////////////////////////////// +// +// This function creates a stack frame right below the target frame, restores all callee +// saved registers, SP, and RA from the passed in context. +// Then it uses the ThrowExceptionHelper to throw the passed in exception from that context. +// EXTERN_C void ThrowExceptionFromContextInternal(CONTEXT* context, PAL_SEHException* ex); +LEAF_ENTRY ThrowExceptionFromContextInternal, _TEXT +#ifdef HAS_ASAN +#pragma error("LLVM v3.9 ASAN unimplemented on RISCV64 yet") +#endif + addi sp, sp, -16 + .cfi_adjust_cfa_offset 16 + + // Save the FP & RA to the stack so that the unwind can work at the instruction after + // loading the FP from the context, but before loading the SP from the context. + sd fp, 0(sp) + sd ra, 8(sp) + .cfi_rel_offset fp, 0 + .cfi_rel_offset ra, 8 + + ld tp, CONTEXT_Tp(a0) + ld gp, CONTEXT_Gp(a0) + ld s1, CONTEXT_S1(a0) + ld s2, CONTEXT_S2(a0) + ld s3, CONTEXT_S3(a0) + ld s4, CONTEXT_S4(a0) + ld s5, CONTEXT_S5(a0) + ld s6, CONTEXT_S6(a0) + ld s7, CONTEXT_S7(a0) + ld s8, CONTEXT_S8(a0) + ld s9, CONTEXT_S9(a0) + ld s10, CONTEXT_S10(a0) + ld s11, CONTEXT_S11(a0) + ld ra, CONTEXT_Ra(a0) + + ld fp, CONTEXT_Fp(a0) + ld sp, CONTEXT_Sp(a0) + + // The PAL_SEHException pointer + addi a0, a1, 0 + jal x0, ThrowExceptionHelper +LEAF_END ThrowExceptionFromContextInternal, _TEXT diff --git a/src/coreclr/pal/src/arch/riscv64/signalhandlerhelper.cpp b/src/coreclr/pal/src/arch/riscv64/signalhandlerhelper.cpp index a4ce803b47afdb..1ad1d7b0d041c5 100644 --- a/src/coreclr/pal/src/arch/riscv64/signalhandlerhelper.cpp +++ b/src/coreclr/pal/src/arch/riscv64/signalhandlerhelper.cpp @@ -26,5 +26,48 @@ Parameters : --*/ void ExecuteHandlerOnCustomStack(int code, siginfo_t *siginfo, void *context, size_t customSp, SignalHandlerWorkerReturnPoint* returnPoint) { -#error "TODO-RISCV64: missing implementation" + ucontext_t *ucontext = (ucontext_t *)context; + size_t faultSp = (size_t)MCREG_Sp(ucontext->uc_mcontext); + _ASSERTE(IS_ALIGNED(faultSp, 8)); + + if (customSp == 0) + { + // preserve 128 bytes long red zone and align stack pointer + customSp = ALIGN_DOWN(faultSp - 128, 16); + } + + size_t fakeFrameReturnAddress; + + if (IS_ALIGNED(faultSp, 16)) + { + fakeFrameReturnAddress = (size_t)SignalHandlerWorkerReturnOffset0 + (size_t)CallSignalHandlerWrapper0; + } + else + { + fakeFrameReturnAddress = (size_t)SignalHandlerWorkerReturnOffset8 + (size_t)CallSignalHandlerWrapper8; + } + + // preserve 128 bytes long red zone and align stack pointer + size_t* sp = (size_t*)customSp; + + // Build fake stack frame to enable the stack unwinder to unwind from signal_handler_worker to the faulting instruction + // pushed RA + *--sp = (size_t)MCREG_Pc(ucontext->uc_mcontext); + // pushed frame pointer + *--sp = (size_t)MCREG_Fp(ucontext->uc_mcontext); + + // Switch the current context to the signal_handler_worker and the original stack + CONTEXT context2; + RtlCaptureContext(&context2); + + context2.Sp = (size_t)sp; + context2.Fp = (size_t)sp; + context2.Ra = fakeFrameReturnAddress; + context2.Pc = (size_t)signal_handler_worker; + context2.A0 = code; + context2.A1 = (size_t)siginfo; + context2.A2 = (size_t)context; + context2.A3 = (size_t)returnPoint; + + RtlRestoreContext(&context2, NULL); } diff --git a/src/coreclr/pal/src/exception/remote-unwind.cpp b/src/coreclr/pal/src/exception/remote-unwind.cpp index 22c72eeb709f08..0a6aba1fa3fc05 100644 --- a/src/coreclr/pal/src/exception/remote-unwind.cpp +++ b/src/coreclr/pal/src/exception/remote-unwind.cpp @@ -124,7 +124,7 @@ typedef BOOL(*UnwindReadMemoryCallback)(PVOID address, PVOID buffer, SIZE_T size #define PRId PRId32 #define PRIA "08" #define PRIxA PRIA PRIx -#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_S390X) || defined(TARGET_LOONGARCH64) || defined(TARGET_POWERPC64) +#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_S390X) || defined(TARGET_LOONGARCH64) || defined(TARGET_POWERPC64) || defined(TARGET_RISCV64) #define PRIx PRIx64 #define PRIu PRIu64 #define PRId PRId64 @@ -1895,6 +1895,22 @@ static void GetContextPointers(unw_cursor_t *cursor, unw_context_t *unwContext, GetContextPointer(cursor, unwContext, UNW_PPC64_R29, (SIZE_T **)&contextPointers->R29); GetContextPointer(cursor, unwContext, UNW_PPC64_R30, (SIZE_T **)&contextPointers->R30); GetContextPointer(cursor, unwContext, UNW_PPC64_R31, (SIZE_T **)&contextPointers->R31); +#elif defined(TARGET_RISCV64) + GetContextPointer(cursor, unwContext, UNW_RISCV_X1, (SIZE_T **)&contextPointers->Ra); + GetContextPointer(cursor, unwContext, UNW_RISCV_X3, (SIZE_T **)&contextPointers->Gp); + GetContextPointer(cursor, unwContext, UNW_RISCV_X4, (SIZE_T **)&contextPointers->Tp); + GetContextPointer(cursor, unwContext, UNW_RISCV_X8, (SIZE_T **)&contextPointers->Fp); + GetContextPointer(cursor, unwContext, UNW_RISCV_X9, (SIZE_T **)&contextPointers->S1); + GetContextPointer(cursor, unwContext, UNW_RISCV_X18, (SIZE_T **)&contextPointers->S2); + GetContextPointer(cursor, unwContext, UNW_RISCV_X19, (SIZE_T **)&contextPointers->S3); + GetContextPointer(cursor, unwContext, UNW_RISCV_X20, (SIZE_T **)&contextPointers->S4); + GetContextPointer(cursor, unwContext, UNW_RISCV_X21, (SIZE_T **)&contextPointers->S5); + GetContextPointer(cursor, unwContext, UNW_RISCV_X22, (SIZE_T **)&contextPointers->S6); + GetContextPointer(cursor, unwContext, UNW_RISCV_X23, (SIZE_T **)&contextPointers->S7); + GetContextPointer(cursor, unwContext, UNW_RISCV_X24, (SIZE_T **)&contextPointers->S8); + GetContextPointer(cursor, unwContext, UNW_RISCV_X25, (SIZE_T **)&contextPointers->S9); + GetContextPointer(cursor, unwContext, UNW_RISCV_X26, (SIZE_T **)&contextPointers->S10); + GetContextPointer(cursor, unwContext, UNW_RISCV_X27, (SIZE_T **)&contextPointers->S11); #else #error unsupported architecture #endif @@ -2003,6 +2019,25 @@ static void UnwindContextToContext(unw_cursor_t *cursor, CONTEXT *winContext) unw_get_reg(cursor, UNW_PPC64_R28, (unw_word_t *) &winContext->R28); unw_get_reg(cursor, UNW_PPC64_R29, (unw_word_t *) &winContext->R29); unw_get_reg(cursor, UNW_PPC64_R30, (unw_word_t *) &winContext->R30); +#elif defined(TARGET_RISCV64) + unw_get_reg(cursor, UNW_REG_IP, (unw_word_t *) &winContext->Pc); + unw_get_reg(cursor, UNW_REG_SP, (unw_word_t *) &winContext->Sp); + unw_get_reg(cursor, UNW_RISCV_X1, (unw_word_t *) &winContext->Ra); + unw_get_reg(cursor, UNW_RISCV_X3, (unw_word_t *) &winContext->Gp); + unw_get_reg(cursor, UNW_RISCV_X4, (unw_word_t *) &winContext->Tp); + unw_get_reg(cursor, UNW_RISCV_X8, (unw_word_t *) &winContext->Fp); + unw_get_reg(cursor, UNW_RISCV_X9, (unw_word_t *) &winContext->S1); + unw_get_reg(cursor, UNW_RISCV_X18, (unw_word_t *) &winContext->S2); + unw_get_reg(cursor, UNW_RISCV_X19, (unw_word_t *) &winContext->S3); + unw_get_reg(cursor, UNW_RISCV_X20, (unw_word_t *) &winContext->S4); + unw_get_reg(cursor, UNW_RISCV_X21, (unw_word_t *) &winContext->S5); + unw_get_reg(cursor, UNW_RISCV_X22, (unw_word_t *) &winContext->S6); + unw_get_reg(cursor, UNW_RISCV_X23, (unw_word_t *) &winContext->S7); + unw_get_reg(cursor, UNW_RISCV_X24, (unw_word_t *) &winContext->S8); + unw_get_reg(cursor, UNW_RISCV_X25, (unw_word_t *) &winContext->S9); + unw_get_reg(cursor, UNW_RISCV_X26, (unw_word_t *) &winContext->S10); + unw_get_reg(cursor, UNW_RISCV_X27, (unw_word_t *) &winContext->S11); + TRACE("sp %p gp %p fp %p tp %p ra %p\n", winContext->Sp, winContext->Gp, winContext->Fp, winContext->Tp, winContext->Ra); #else #error unsupported architecture #endif @@ -2138,6 +2173,23 @@ access_reg(unw_addr_space_t as, unw_regnum_t regnum, unw_word_t *valp, int write case UNW_PPC64_R30: *valp = (unw_word_t)winContext->R30; break; case UNW_PPC64_R31: *valp = (unw_word_t)winContext->R31; break; case UNW_PPC64_NIP: *valp = (unw_word_t)winContext->Nip; break; +#elif defined(TARGET_RISCV64) + case UNW_RISCV_X1: *valp = (unw_word_t)winContext->Ra; break; + case UNW_RISCV_X3: *valp = (unw_word_t)winContext->Gp; break; + case UNW_RISCV_X4: *valp = (unw_word_t)winContext->Tp; break; + case UNW_RISCV_X8: *valp = (unw_word_t)winContext->Fp; break; + case UNW_RISCV_X9: *valp = (unw_word_t)winContext->S1; break; + case UNW_RISCV_X18: *valp = (unw_word_t)winContext->S2; break; + case UNW_RISCV_X19: *valp = (unw_word_t)winContext->S3; break; + case UNW_RISCV_X20: *valp = (unw_word_t)winContext->S4; break; + case UNW_RISCV_X21: *valp = (unw_word_t)winContext->S5; break; + case UNW_RISCV_X22: *valp = (unw_word_t)winContext->S6; break; + case UNW_RISCV_X23: *valp = (unw_word_t)winContext->S7; break; + case UNW_RISCV_X24: *valp = (unw_word_t)winContext->S8; break; + case UNW_RISCV_X25: *valp = (unw_word_t)winContext->S9; break; + case UNW_RISCV_X26: *valp = (unw_word_t)winContext->S10; break; + case UNW_RISCV_X27: *valp = (unw_word_t)winContext->S11; break; + case UNW_RISCV_PC: *valp = (unw_word_t)winContext->Pc; break; #else #error unsupported architecture #endif diff --git a/src/coreclr/pal/src/exception/seh-unwind.cpp b/src/coreclr/pal/src/exception/seh-unwind.cpp index b3a4ca23726bb2..679bb29161a0e7 100644 --- a/src/coreclr/pal/src/exception/seh-unwind.cpp +++ b/src/coreclr/pal/src/exception/seh-unwind.cpp @@ -154,18 +154,17 @@ enum ASSIGN_REG(S8) #elif (defined(HOST_UNIX) && defined(HOST_RISCV64)) -#error "TODO-RISCV64: review this" +// #error "TODO-RISCV64: review this" // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/2d865a2964fe06bfc569ab00c74e152b582ed764/riscv-cc.adoc #define ASSIGN_UNWIND_REGS \ ASSIGN_REG(Ra) \ ASSIGN_REG(Sp) \ - ASSIGN_REG(Sp) \ ASSIGN_REG(Gp) \ ASSIGN_REG(Tp) \ ASSIGN_REG(Pc) \ - ASSIGN_REG(S0) \ + ASSIGN_REG(Fp) \ ASSIGN_REG(S1) \ ASSIGN_REG(S2) \ ASSIGN_REG(S3) \ @@ -473,27 +472,17 @@ void UnwindContextToWinContext(unw_cursor_t *cursor, CONTEXT *winContext) unw_get_reg(cursor, UNW_LOONGARCH64_R30, (unw_word_t *) &winContext->S7); unw_get_reg(cursor, UNW_LOONGARCH64_R31, (unw_word_t *) &winContext->S8); #elif (defined(HOST_UNIX) && defined(HOST_RISCV64)) -#error "TODO-RISCV64: review this" +// #error "TODO-RISCV64: review this" // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/2d865a2964fe06bfc569ab00c74e152b582ed764/riscv-cc.adoc unw_get_reg(cursor, UNW_REG_IP, (unw_word_t *) &winContext->Pc); - unw_get_reg(cursor, UNW_RISCV_X1, (unw_word_t *) &winContext->Ra); unw_get_reg(cursor, UNW_REG_SP, (unw_word_t *) &winContext->Sp); + unw_get_reg(cursor, UNW_RISCV_X1, (unw_word_t *) &winContext->Ra); + unw_get_reg(cursor, UNW_RISCV_X3, (unw_word_t *) &winContext->Gp); unw_get_reg(cursor, UNW_RISCV_X4, (unw_word_t *) &winContext->Tp); - unw_get_reg(cursor, UNW_RISCV_X5, (unw_word_t *) &winContext->T0); - unw_get_reg(cursor, UNW_RISCV_X6, (unw_word_t *) &winContext->T1); - unw_get_reg(cursor, UNW_RISCV_X7, (unw_word_t *) &winContext->T2); - unw_get_reg(cursor, UNW_RISCV_X8, (unw_word_t *) &winContext->S0); + unw_get_reg(cursor, UNW_RISCV_X8, (unw_word_t *) &winContext->Fp); unw_get_reg(cursor, UNW_RISCV_X9, (unw_word_t *) &winContext->S1); - unw_get_reg(cursor, UNW_RISCV_X10, (unw_word_t *) &winContext->A0); - unw_get_reg(cursor, UNW_RISCV_X11, (unw_word_t *) &winContext->A1); - unw_get_reg(cursor, UNW_RISCV_X12, (unw_word_t *) &winContext->A2); - unw_get_reg(cursor, UNW_RISCV_X13, (unw_word_t *) &winContext->A3); - unw_get_reg(cursor, UNW_RISCV_X14, (unw_word_t *) &winContext->A4); - unw_get_reg(cursor, UNW_RISCV_X15, (unw_word_t *) &winContext->A5); - unw_get_reg(cursor, UNW_RISCV_X16, (unw_word_t *) &winContext->A6); - unw_get_reg(cursor, UNW_RISCV_X17, (unw_word_t *) &winContext->A7); unw_get_reg(cursor, UNW_RISCV_X18, (unw_word_t *) &winContext->S2); unw_get_reg(cursor, UNW_RISCV_X19, (unw_word_t *) &winContext->S3); unw_get_reg(cursor, UNW_RISCV_X20, (unw_word_t *) &winContext->S4); @@ -504,10 +493,6 @@ void UnwindContextToWinContext(unw_cursor_t *cursor, CONTEXT *winContext) unw_get_reg(cursor, UNW_RISCV_X25, (unw_word_t *) &winContext->S9); unw_get_reg(cursor, UNW_RISCV_X26, (unw_word_t *) &winContext->S10); unw_get_reg(cursor, UNW_RISCV_X27, (unw_word_t *) &winContext->S11); - unw_get_reg(cursor, UNW_RISCV_X28, (unw_word_t *) &winContext->T3); - unw_get_reg(cursor, UNW_RISCV_X29, (unw_word_t *) &winContext->T4); - unw_get_reg(cursor, UNW_RISCV_X30, (unw_word_t *) &winContext->T5); - unw_get_reg(cursor, UNW_RISCV_X31, (unw_word_t *) &winContext->T6); #elif (defined(HOST_UNIX) && defined(HOST_POWERPC64)) unw_get_reg(cursor, UNW_REG_SP, (unw_word_t *) &winContext->R31); unw_get_reg(cursor, UNW_REG_IP, (unw_word_t *) &winContext->Nip); @@ -627,24 +612,15 @@ void GetContextPointers(unw_cursor_t *cursor, unw_context_t *unwContext, KNONVOL GetContextPointer(cursor, unwContext, UNW_LOONGARCH64_R30, (SIZE_T **)&contextPointers->S7); GetContextPointer(cursor, unwContext, UNW_LOONGARCH64_R31, (SIZE_T **)&contextPointers->S8); #elif (defined(HOST_UNIX) && defined(HOST_RISCV64)) -#error "TODO-RISCV64: review this" +// #error "TODO-RISCV64: review this" // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/2d865a2964fe06bfc569ab00c74e152b582ed764/riscv-cc.adoc GetContextPointer(cursor, unwContext, UNW_RISCV_X1, (SIZE_T **)&contextPointers->Ra); + GetContextPointer(cursor, unwContext, UNW_RISCV_X3, (SIZE_T **)&contextPointers->Gp); GetContextPointer(cursor, unwContext, UNW_RISCV_X4, (SIZE_T **)&contextPointers->Tp); - GetContextPointer(cursor, unwContext, UNW_RISCV_X5, (SIZE_T **)&contextPointers->T0); - GetContextPointer(cursor, unwContext, UNW_RISCV_X7, (SIZE_T **)&contextPointers->T1); - GetContextPointer(cursor, unwContext, UNW_RISCV_X8, (SIZE_T **)&contextPointers->S0); + GetContextPointer(cursor, unwContext, UNW_RISCV_X8, (SIZE_T **)&contextPointers->Fp); GetContextPointer(cursor, unwContext, UNW_RISCV_X9, (SIZE_T **)&contextPointers->S1); - GetContextPointer(cursor, unwContext, UNW_RISCV_X10, (SIZE_T **)&contextPointers->A0); - GetContextPointer(cursor, unwContext, UNW_RISCV_X11, (SIZE_T **)&contextPointers->A1); - GetContextPointer(cursor, unwContext, UNW_RISCV_X12, (SIZE_T **)&contextPointers->A2); - GetContextPointer(cursor, unwContext, UNW_RISCV_X13, (SIZE_T **)&contextPointers->A3); - GetContextPointer(cursor, unwContext, UNW_RISCV_X14, (SIZE_T **)&contextPointers->A4); - GetContextPointer(cursor, unwContext, UNW_RISCV_X15, (SIZE_T **)&contextPointers->A5); - GetContextPointer(cursor, unwContext, UNW_RISCV_X16, (SIZE_T **)&contextPointers->A6); - GetContextPointer(cursor, unwContext, UNW_RISCV_X17, (SIZE_T **)&contextPointers->A7); GetContextPointer(cursor, unwContext, UNW_RISCV_X18, (SIZE_T **)&contextPointers->S2); GetContextPointer(cursor, unwContext, UNW_RISCV_X19, (SIZE_T **)&contextPointers->S3); GetContextPointer(cursor, unwContext, UNW_RISCV_X20, (SIZE_T **)&contextPointers->S4); @@ -655,10 +631,6 @@ void GetContextPointers(unw_cursor_t *cursor, unw_context_t *unwContext, KNONVOL GetContextPointer(cursor, unwContext, UNW_RISCV_X25, (SIZE_T **)&contextPointers->S9); GetContextPointer(cursor, unwContext, UNW_RISCV_X26, (SIZE_T **)&contextPointers->S10); GetContextPointer(cursor, unwContext, UNW_RISCV_X27, (SIZE_T **)&contextPointers->S11); - GetContextPointer(cursor, unwContext, UNW_RISCV_X28, (SIZE_T **)&contextPointers->T3); - GetContextPointer(cursor, unwContext, UNW_RISCV_X29, (SIZE_T **)&contextPointers->T4); - GetContextPointer(cursor, unwContext, UNW_RISCV_X30, (SIZE_T **)&contextPointers->T5); - GetContextPointer(cursor, unwContext, UNW_RISCV_X31, (SIZE_T **)&contextPointers->T6); #elif (defined(HOST_UNIX) && defined(HOST_POWERPC64)) GetContextPointer(cursor, unwContext, UNW_PPC64_R14, (SIZE_T **)&contextPointers->R14); GetContextPointer(cursor, unwContext, UNW_PPC64_R15, (SIZE_T **)&contextPointers->R15); diff --git a/src/coreclr/pal/src/include/pal/context.h b/src/coreclr/pal/src/include/pal/context.h index c702ae272a76bd..5fab30aca173c5 100644 --- a/src/coreclr/pal/src/include/pal/context.h +++ b/src/coreclr/pal/src/include/pal/context.h @@ -157,7 +157,7 @@ using asm_sigcontext::_xstate; #define MCREG_Pc(mc) ((mc).__pc) #elif defined(HOST_RISCV64) -#error "TODO-RISCV64: review this" +// #error "TODO-RISCV64: review this" #define MCREG_Ra(mc) ((mc).__gregs[1]) #define MCREG_Sp(mc) ((mc).__gregs[2]) @@ -166,7 +166,7 @@ using asm_sigcontext::_xstate; #define MCREG_T0(mc) ((mc).__gregs[5]) #define MCREG_T1(mc) ((mc).__gregs[6]) #define MCREG_T2(mc) ((mc).__gregs[7]) -#define MCREG_S0(mc) ((mc).__gregs[8]) +#define MCREG_Fp(mc) ((mc).__gregs[8]) #define MCREG_S1(mc) ((mc).__gregs[9]) #define MCREG_A0(mc) ((mc).__gregs[10]) #define MCREG_A1(mc) ((mc).__gregs[11]) @@ -1130,8 +1130,6 @@ inline static DWORD64 CONTEXTGetFP(LPCONTEXT pContext) return pContext->R11; #elif defined(HOST_POWERPC64) return pContext->R31; -#elif defined(HOST_RISCV64) - return pContext->S0; #else return pContext->Fp; #endif diff --git a/src/coreclr/pal/src/include/pal/mutex.hpp b/src/coreclr/pal/src/include/pal/mutex.hpp index 464f0f72afb454..db35b9ea08556d 100644 --- a/src/coreclr/pal/src/include/pal/mutex.hpp +++ b/src/coreclr/pal/src/include/pal/mutex.hpp @@ -124,9 +124,11 @@ Miscellaneous // involved. See https://github.com/dotnet/runtime/issues/10519. // - On OSX, pthread robust mutexes were/are not available at the time of this writing. In case they are made available in the // future, their use is disabled for compatibility. +// - On RISCV64 (with QEMU), pthread robust mutexes were/are not available at the time of this writing. In case they are made available in the +// future, their use is disabled for compatibility. #if HAVE_FULLY_FEATURED_PTHREAD_MUTEXES && \ HAVE_FUNCTIONAL_PTHREAD_ROBUST_MUTEXES && \ - !(defined(__FreeBSD__) || defined(TARGET_OSX)) + !(defined(__FreeBSD__) || defined(TARGET_OSX) || defined(TARGET_RISCV64)) #define NAMED_MUTEX_USE_PTHREAD_MUTEX 1 #else diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index a17c6c077da3b7..d9a17b95336209 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -199,23 +199,22 @@ typedef int __ptrace_request; #elif defined(HOST_RISCV64) -#error "TODO-RISCV64: review this" +// #error "TODO-RISCV64: review this" // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/2d865a2964fe06bfc569ab00c74e152b582ed764/riscv-cc.adoc #define ASSIGN_CONTROL_REGS \ ASSIGN_REG(Ra) \ ASSIGN_REG(Sp) \ - ASSIGN_REG(Sp) \ - ASSIGN_REG(Gp) \ - ASSIGN_REG(Tp) \ + ASSIGN_REG(Fp) \ ASSIGN_REG(Pc) #define ASSIGN_INTEGER_REGS \ + ASSIGN_REG(Gp) \ + ASSIGN_REG(Tp) \ ASSIGN_REG(T0) \ ASSIGN_REG(T1) \ ASSIGN_REG(T2) \ - ASSIGN_REG(S0) \ ASSIGN_REG(S1) \ ASSIGN_REG(A0) \ ASSIGN_REG(A1) \ @@ -671,7 +670,7 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) } #elif defined(HOST_RISCV64) native->uc_mcontext.__fpregs.__d.__fcsr = lpContext->Fcsr; - for (int i = 0; i < 64; i++) + for (int i = 0; i < 32; i++) { native->uc_mcontext.__fpregs.__d.__f[i] = lpContext->F[i]; } @@ -839,7 +838,7 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex } #elif defined(HOST_RISCV64) lpContext->Fcsr = native->uc_mcontext.__fpregs.__d.__fcsr; - for (int i = 0; i < 64; i++) + for (int i = 0; i < 32; i++) { lpContext->F[i] = native->uc_mcontext.__fpregs.__d.__f[i]; } diff --git a/src/coreclr/scripts/coreclr_arguments.py b/src/coreclr/scripts/coreclr_arguments.py index 400a2da00c2dc5..4bea9477b8391d 100644 --- a/src/coreclr/scripts/coreclr_arguments.py +++ b/src/coreclr/scripts/coreclr_arguments.py @@ -63,7 +63,7 @@ def __init__(self, self.require_built_core_root = require_built_core_root self.require_built_test_dir = require_built_test_dir - self.valid_arches = ["x64", "x86", "arm", "arm64", "loongarch64", "wasm"] + self.valid_arches = ["x64", "x86", "arm", "arm64", "loongarch64", "riscv64", "wasm"] self.valid_build_types = ["Debug", "Checked", "Release"] self.valid_host_os = ["windows", "osx", "linux", "illumos", "solaris", "browser", "android", "wasi"] diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp index ff4dd582f3c1bf..676c490753924f 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp @@ -253,6 +253,8 @@ static SPMI_TARGET_ARCHITECTURE SpmiTargetArchitecture = SPMI_TARGET_ARCHITECTUR static SPMI_TARGET_ARCHITECTURE SpmiTargetArchitecture = SPMI_TARGET_ARCHITECTURE_ARM64; #elif defined(TARGET_LOONGARCH64) static SPMI_TARGET_ARCHITECTURE SpmiTargetArchitecture = SPMI_TARGET_ARCHITECTURE_LOONGARCH64; +#elif defined(TARGET_RISCV64) +static SPMI_TARGET_ARCHITECTURE SpmiTargetArchitecture = SPMI_TARGET_ARCHITECTURE_RISCV64; #else #error Unsupported architecture #endif diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h index a97e8fab4b32e6..4b13202f5e2933 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h @@ -54,7 +54,8 @@ enum SPMI_TARGET_ARCHITECTURE SPMI_TARGET_ARCHITECTURE_AMD64, SPMI_TARGET_ARCHITECTURE_ARM64, SPMI_TARGET_ARCHITECTURE_ARM, - SPMI_TARGET_ARCHITECTURE_LOONGARCH64 + SPMI_TARGET_ARCHITECTURE_LOONGARCH64, + SPMI_TARGET_ARCHITECTURE_RISCV64 }; SPMI_TARGET_ARCHITECTURE GetSpmiTargetArchitecture(); @@ -67,7 +68,7 @@ inline bool IsSpmiTarget32Bit() inline bool IsSpmiTarget64Bit() { - return (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_AMD64) || (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_ARM64) || (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_LOONGARCH64); + return (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_AMD64) || (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_ARM64) || (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_LOONGARCH64) || (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_RISCV64); } inline size_t SpmiTargetPointerSize() diff --git a/src/coreclr/unwinder/riscv64/unwinder.cpp b/src/coreclr/unwinder/riscv64/unwinder.cpp index bec3a8da31b44d..53093f7e135880 100644 --- a/src/coreclr/unwinder/riscv64/unwinder.cpp +++ b/src/coreclr/unwinder/riscv64/unwinder.cpp @@ -9,4 +9,1466 @@ #include "unwinder.h" -#error "TODO-RISCV64: missing implementation" +typedef struct _RISCV64_KTRAP_FRAME { + +// +// Exception active indicator. +// +// 0 - interrupt frame. +// 1 - exception frame. +// 2 - service frame. +// + + /* +0x000 */ UCHAR ExceptionActive; // always valid + /* +0x001 */ UCHAR ContextFromKFramesUnwound; // set if KeContextFromKFrames created this frame + /* +0x002 */ UCHAR DebugRegistersValid; // always valid + /* +0x003 */ union { + UCHAR PreviousMode; // system services only + UCHAR PreviousIrql; // interrupts only + }; + +// +// Page fault information (page faults only) +// Previous trap frame address (system services only) +// +// Organized this way to allow first couple words to be used +// for scratch space in the general case +// + + /* +0x004 */ ULONG FaultStatus; // page faults only + /* +0x008 */ union { + ULONG64 FaultAddress; // page faults only + ULONG64 TrapFrame; // system services only + }; + +// +// The LOONGARCH architecture does not have an architectural trap frame. On +// an exception or interrupt, the processor switches to an +// exception-specific processor mode in which at least the RA and SP +// registers are banked. Software is responsible for preserving +// registers which reflect the processor state in which the +// exception occurred rather than any intermediate processor modes. +// + +// +// Volatile floating point state is dynamically allocated; this +// pointer may be NULL if the FPU was not enabled at the time the +// trap was taken. +// + + /* +0x010 */ PVOID VfpState; + +// +// Volatile registers +// + ULONG64 R[15]; + ULONG64 Gp; + ULONG64 Tp; + ULONG64 Sp; + ULONG64 Fp; + ULONG64 Ra; + ULONG64 Pc; + +} RISCV64_KTRAP_FRAME, *PRISCV64_KTRAP_FRAME; + +typedef struct _RISCV64_VFP_STATE +{ + struct _RISCV64_VFP_STATE *Link; // link to next state entry + ULONG Fcsr; // FCSR register + ULONG64 F[32]; // All F registers (0-31) +} RISCV64_VFP_STATE, *PRISCV64_VFP_STATE, KRISCV64_VFP_STATE, *PKRISCV64_VFP_STATE; + +// +// Parameters describing the unwind codes. +// + +#define STATUS_UNWIND_UNSUPPORTED_VERSION STATUS_UNSUCCESSFUL +#define STATUS_UNWIND_NOT_IN_FUNCTION STATUS_UNSUCCESSFUL +#define STATUS_UNWIND_INVALID_SEQUENCE STATUS_UNSUCCESSFUL + +// +// Macros for accessing memory. These can be overridden if other code +// (in particular the debugger) needs to use them. + +#define MEMORY_READ_BYTE(params, addr) (*dac_cast(addr)) +#define MEMORY_READ_DWORD(params, addr) (*dac_cast(addr)) +#define MEMORY_READ_QWORD(params, addr) (*dac_cast(addr)) + +typedef struct _RISCV64_UNWIND_PARAMS +{ + PT_KNONVOLATILE_CONTEXT_POINTERS ContextPointers; +} RISCV64_UNWIND_PARAMS, *PRISCV64_UNWIND_PARAMS; + + +#define UNWIND_PARAMS_SET_TRAP_FRAME(Params, Address, Size) +#define UPDATE_CONTEXT_POINTERS(Params, RegisterNumber, Address) \ +do { \ + if (ARGUMENT_PRESENT(Params)) { \ + PT_KNONVOLATILE_CONTEXT_POINTERS ContextPointers = (Params)->ContextPointers; \ + if (ARGUMENT_PRESENT(ContextPointers)) { \ + if (RegisterNumber == 8) \ + ContextPointers->Fp = (PDWORD64)Address; \ + else if (RegisterNumber == 9) \ + ContextPointers->S1 = (PDWORD64)Address; \ + else if (RegisterNumber >= 18 && RegisterNumber <= 27) \ + (&ContextPointers->S2)[RegisterNumber - 18] = (PDWORD64)Address; \ + } \ + } \ +} while (0) + + +#define UPDATE_FP_CONTEXT_POINTERS(Params, RegisterNumber, Address) \ +do { \ + if (ARGUMENT_PRESENT(Params)) { \ + PT_KNONVOLATILE_CONTEXT_POINTERS ContextPointers = (Params)->ContextPointers; \ + if (ARGUMENT_PRESENT(ContextPointers)) { \ + if (RegisterNumber == 8) \ + ContextPointers->F8 = (PDWORD64)Address; \ + else if (RegisterNumber == 9) \ + ContextPointers->F9 = (PDWORD64)Address; \ + else if (RegisterNumber >= 18 && RegisterNumber <= 27) \ + (&ContextPointers->F18)[RegisterNumber - 18] = (PDWORD64)Address; \ + } \ + } \ +} while (0) + +#define VALIDATE_STACK_ADDRESS_EX(Params, Context, Address, DataSize, Alignment, OutStatus) +#define VALIDATE_STACK_ADDRESS(Params, Context, DataSize, Alignment, OutStatus) + +// +// Macros to clarify opcode parsing +// + +#define OPCODE_IS_END(Op) (((Op) & 0xfe) == 0xe4) + +// +// This table describes the size of each unwind code, in bytes +// + +static const BYTE UnwindCodeSizeTable[256] = +{ + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2, 3,2,2,2,3,2,2,2, 3,2,2,2,2,2,3,2, 3,2,3,2,3,2,2,2, + 4,1,3,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1 +}; + +NTSTATUS +RtlpUnwindCustom( + __inout PT_CONTEXT ContextRecord, + _In_ BYTE Opcode, + _In_ PRISCV64_UNWIND_PARAMS UnwindParams + ) + +/*++ + +Routine Description: + + Handles custom unwinding operations involving machine-specific + frames. + +Arguments: + + ContextRecord - Supplies the address of a context record. + + Opcode - The opcode to decode. + + UnwindParams - Additional parameters shared with caller. + +Return Value: + + An NTSTATUS indicating either STATUS_SUCCESS if everything went ok, or + another status code if there were problems. + +--*/ + +{ + ULONG Fcsr; + ULONG RegIndex; + ULONG_PTR SourceAddress; + ULONG_PTR StartingSp; + NTSTATUS Status; + ULONG_PTR VfpStateAddress; + + StartingSp = ContextRecord->Sp; + Status = STATUS_SUCCESS; + + // + // The opcode describes the special-case stack + // + + switch (Opcode) + { + + // + // Trap frame case + // + + case 0xe8: // MSFT_OP_TRAP_FRAME: + + // + // Ensure there is enough valid space for the trap frame + // + + VALIDATE_STACK_ADDRESS(UnwindParams, ContextRecord, sizeof(RISCV64_KTRAP_FRAME), 16, &Status); + if (!NT_SUCCESS(Status)) { + return Status; + } + + // + // Restore R0-R14, and F0-F32 + // + SourceAddress = StartingSp + offsetof(RISCV64_KTRAP_FRAME, R); + for (RegIndex = 0; RegIndex < 15; RegIndex++) { + UPDATE_CONTEXT_POINTERS(UnwindParams, RegIndex, SourceAddress); +#ifdef __GNUC__ + *(&ContextRecord->R0 + RegIndex) = MEMORY_READ_QWORD(UnwindParams, SourceAddress); +#else + ContextRecord->R[RegIndex] = MEMORY_READ_QWORD(UnwindParams, SourceAddress); +#endif + SourceAddress += sizeof(ULONG_PTR); + } + + SourceAddress = StartingSp + offsetof(RISCV64_KTRAP_FRAME, VfpState); + VfpStateAddress = MEMORY_READ_QWORD(UnwindParams, SourceAddress); + if (VfpStateAddress != 0) { + + SourceAddress = VfpStateAddress + offsetof(KRISCV64_VFP_STATE, Fcsr); + Fcsr = MEMORY_READ_DWORD(UnwindParams, SourceAddress); + if (Fcsr != (ULONG)-1) { + + ContextRecord->Fcsr = Fcsr; + + SourceAddress = VfpStateAddress + offsetof(KRISCV64_VFP_STATE, F); + for (RegIndex = 0; RegIndex < 32; RegIndex++) { + UPDATE_FP_CONTEXT_POINTERS(UnwindParams, RegIndex, SourceAddress); + ContextRecord->F[RegIndex] = MEMORY_READ_QWORD(UnwindParams, SourceAddress); + SourceAddress += 2 * sizeof(ULONGLONG); + } + } + } + + // + // Restore SP, RA, PC, and the status registers + // + + //SourceAddress = StartingSp + offsetof(RISCV64_KTRAP_FRAME, Tp);//TP + //ContextRecord->Tp = MEMORY_READ_QWORD(UnwindParams, SourceAddress); + + SourceAddress = StartingSp + offsetof(RISCV64_KTRAP_FRAME, Sp); + ContextRecord->Sp = MEMORY_READ_QWORD(UnwindParams, SourceAddress); + + SourceAddress = StartingSp + offsetof(RISCV64_KTRAP_FRAME, Fp); + ContextRecord->Fp = MEMORY_READ_QWORD(UnwindParams, SourceAddress); + + SourceAddress = StartingSp + offsetof(RISCV64_KTRAP_FRAME, Ra); + ContextRecord->Ra = MEMORY_READ_QWORD(UnwindParams, SourceAddress); + + SourceAddress = StartingSp + offsetof(RISCV64_KTRAP_FRAME, Pc); + ContextRecord->Pc = MEMORY_READ_QWORD(UnwindParams, SourceAddress); + + // + // Set the trap frame and clear the unwound-to-call flag + // + + UNWIND_PARAMS_SET_TRAP_FRAME(UnwindParams, StartingSp, sizeof(RISCV64_KTRAP_FRAME)); + ContextRecord->ContextFlags &= ~CONTEXT_UNWOUND_TO_CALL; + break; + + // + // Context case + // + + case 0xea: // MSFT_OP_CONTEXT: + + // + // Ensure there is enough valid space for the full CONTEXT structure + // + + VALIDATE_STACK_ADDRESS(UnwindParams, ContextRecord, sizeof(CONTEXT), 16, &Status); + if (!NT_SUCCESS(Status)) { + return Status; + } + + // + // Restore R0-R23, and F0-F31 + // + + SourceAddress = StartingSp + offsetof(T_CONTEXT, R0); + for (RegIndex = 0; RegIndex < 23; RegIndex++) { + UPDATE_CONTEXT_POINTERS(UnwindParams, RegIndex, SourceAddress); +#ifdef __GNUC__ + *(&ContextRecord->R0 + RegIndex) = MEMORY_READ_QWORD(UnwindParams, SourceAddress); +#else + ContextRecord->R[RegIndex] = MEMORY_READ_QWORD(UnwindParams, SourceAddress); +#endif + SourceAddress += sizeof(ULONG_PTR); + } + + SourceAddress = StartingSp + offsetof(T_CONTEXT, F); + for (RegIndex = 0; RegIndex < 32; RegIndex++) { + UPDATE_FP_CONTEXT_POINTERS(UnwindParams, RegIndex, SourceAddress); + ContextRecord->F[RegIndex] = MEMORY_READ_QWORD(UnwindParams, SourceAddress); + SourceAddress += 2 * sizeof(ULONGLONG); + } + + // + // Restore SP, RA, PC, and the status registers + // + + SourceAddress = StartingSp + offsetof(T_CONTEXT, Fp); + ContextRecord->Fp = MEMORY_READ_QWORD(UnwindParams, SourceAddress); + + SourceAddress = StartingSp + offsetof(T_CONTEXT, Sp); + ContextRecord->Sp = MEMORY_READ_QWORD(UnwindParams, SourceAddress); + + SourceAddress = StartingSp + offsetof(T_CONTEXT, Pc); + ContextRecord->Pc = MEMORY_READ_QWORD(UnwindParams, SourceAddress); + + SourceAddress = StartingSp + offsetof(T_CONTEXT, Fcsr); + ContextRecord->Fcsr = MEMORY_READ_DWORD(UnwindParams, SourceAddress); + + // + // Inherit the unwound-to-call flag from this context + // + + SourceAddress = StartingSp + offsetof(T_CONTEXT, ContextFlags); + ContextRecord->ContextFlags &= ~CONTEXT_UNWOUND_TO_CALL; + ContextRecord->ContextFlags |= + MEMORY_READ_DWORD(UnwindParams, SourceAddress) & CONTEXT_UNWOUND_TO_CALL; + break; + + default: + return STATUS_UNSUCCESSFUL; + } + + return STATUS_SUCCESS; +} + +ULONG +RtlpComputeScopeSize( + _In_ ULONG_PTR UnwindCodePtr, + _In_ ULONG_PTR UnwindCodesEndPtr, + _In_ BOOLEAN IsEpilog, + _In_ PRISCV64_UNWIND_PARAMS UnwindParams + ) + +/*++ + +Routine Description: + + Computes the size of an prolog or epilog, in words. + +Arguments: + + UnwindCodePtr - Supplies a pointer to the start of the unwind + code sequence. + + UnwindCodesEndPtr - Supplies a pointer to the byte immediately + following the unwind code table, as described by the header. + + IsEpilog - Specifies TRUE if the scope describes an epilog, + or FALSE if it describes a prolog. + + UnwindParams - Additional parameters shared with caller. + +Return Value: + + The size of the scope described by the unwind codes, in halfword units. + +--*/ + +{ + ULONG ScopeSize; + BYTE Opcode; + + // + // Iterate through the unwind codes until we hit an end marker. + // While iterating, accumulate the total scope size. + // + + ScopeSize = 0; + Opcode = 0; + while (UnwindCodePtr < UnwindCodesEndPtr) { + Opcode = MEMORY_READ_BYTE(UnwindParams, UnwindCodePtr); + if (OPCODE_IS_END(Opcode)) { + break; + } + + UnwindCodePtr += UnwindCodeSizeTable[Opcode]; + ScopeSize++; + } + + // + // Epilogs have one extra instruction at the end that needs to be + // accounted for. + // + + if (IsEpilog) { + ScopeSize++; + } + + return ScopeSize; +} + +NTSTATUS +RtlpUnwindRestoreRegisterRange( + __inout PT_CONTEXT ContextRecord, + _In_ LONG SpOffset, + _In_ ULONG FirstRegister, + _In_ ULONG RegisterCount, + _In_ PRISCV64_UNWIND_PARAMS UnwindParams + ) + +/*++ + +Routine Description: + + Restores a series of integer registers from the stack. + +Arguments: + + ContextRecord - Supplies the address of a context record. + + SpOffset - Specifies a stack offset. Positive values are simply used + as a base offset. Negative values assume a predecrement behavior: + a 0 offset is used for restoration, but the absolute value of the + offset is added to the final Sp. + + FirstRegister - Specifies the index of the first register to restore. + + RegisterCount - Specifies the number of registers to restore. + + UnwindParams - Additional parameters shared with caller. + +Return Value: + + None. + +--*/ + +{ + ULONG_PTR CurAddress; + ULONG RegIndex; + NTSTATUS Status; + + // + // Compute the source address and validate it. + // + + CurAddress = ContextRecord->Sp; + if (SpOffset >= 0) { + CurAddress += SpOffset; + } + + Status = STATUS_SUCCESS; + VALIDATE_STACK_ADDRESS(UnwindParams, ContextRecord, 8 * RegisterCount, 8, &Status); + if (Status != STATUS_SUCCESS) { + return Status; + } + + // + // Restore the registers + // + for (RegIndex = 0; RegIndex < RegisterCount; RegIndex++) { + UPDATE_CONTEXT_POINTERS(UnwindParams, FirstRegister + RegIndex, CurAddress); +#ifdef __GNUC__ + *(&ContextRecord->R0 + FirstRegister + RegIndex) = MEMORY_READ_QWORD(UnwindParams, CurAddress); +#else + ContextRecord->R[FirstRegister + RegIndex] = MEMORY_READ_QWORD(UnwindParams, CurAddress); +#endif + CurAddress += 8; + } + if (SpOffset < 0) { + ContextRecord->Sp -= SpOffset; + } + + return STATUS_SUCCESS; +} + +NTSTATUS +RtlpUnwindRestoreFpRegisterRange( + __inout PT_CONTEXT ContextRecord, + _In_ LONG SpOffset, + _In_ ULONG FirstRegister, + _In_ ULONG RegisterCount, + _In_ PRISCV64_UNWIND_PARAMS UnwindParams + ) + +/*++ + +Routine Description: + + Restores a series of floating-point registers from the stack. + +Arguments: + + ContextRecord - Supplies the address of a context record. + + SpOffset - Specifies a stack offset. Positive values are simply used + as a base offset. Negative values assume a predecrement behavior: + a 0 offset is used for restoration, but the absolute value of the + offset is added to the final Sp. + + FirstRegister - Specifies the index of the first register to restore. + + RegisterCount - Specifies the number of registers to restore. + + UnwindParams - Additional parameters shared with caller. + +Return Value: + + None. + +--*/ + +{ + ULONG_PTR CurAddress; + ULONG RegIndex; + NTSTATUS Status; + + // + // Compute the source address and validate it. + // + + CurAddress = ContextRecord->Sp; + if (SpOffset >= 0) { + CurAddress += SpOffset; + } + + Status = STATUS_SUCCESS; + VALIDATE_STACK_ADDRESS(UnwindParams, ContextRecord, 8 * RegisterCount, 8, &Status); + if (Status != STATUS_SUCCESS) { + return Status; + } + + // + // Restore the registers + // + + for (RegIndex = 0; RegIndex < RegisterCount; RegIndex++) { + UPDATE_FP_CONTEXT_POINTERS(UnwindParams, FirstRegister + RegIndex, CurAddress); + ContextRecord->F[FirstRegister + RegIndex] = MEMORY_READ_QWORD(UnwindParams, CurAddress); + CurAddress += 8; + } + if (SpOffset < 0) { + ContextRecord->Sp -= SpOffset; + } + + return STATUS_SUCCESS; +} + +NTSTATUS +RtlpUnwindFunctionFull( + _In_ DWORD64 ControlPcRva, + _In_ ULONG_PTR ImageBase, + _In_ PT_RUNTIME_FUNCTION FunctionEntry, + __inout T_CONTEXT *ContextRecord, + _Out_ PDWORD64 EstablisherFrame, + __deref_opt_out_opt PEXCEPTION_ROUTINE *HandlerRoutine, + _Out_ PVOID *HandlerData, + _In_ PRISCV64_UNWIND_PARAMS UnwindParams + ) + +/*++ + +Routine Description: + + This function virtually unwinds the specified function by parsing the + .xdata record to determine where in the function the provided ControlPc + is, and then executing unwind codes that map to the function's prolog + or epilog behavior. + + If a context pointers record is specified (in the UnwindParams), then + the address where each nonvolatile register is restored from is recorded + in the appropriate element of the context pointers record. + +Arguments: + + ControlPcRva - Supplies the address where control left the specified + function, as an offset relative to the ImageBase. + + ImageBase - Supplies the base address of the image that contains the + function being unwound. + + FunctionEntry - Supplies the address of the function table entry for the + specified function. If appropriate, this should have already been + probed. + + ContextRecord - Supplies the address of a context record. + + EstablisherFrame - Supplies a pointer to a variable that receives the + the establisher frame pointer value. + + HandlerRoutine - Supplies an optional pointer to a variable that receives + the handler routine address. If control did not leave the specified + function in either the prolog or an epilog and a handler of the + proper type is associated with the function, then the address of the + language specific exception handler is returned. Otherwise, NULL is + returned. + + HandlerData - Supplies a pointer to a variable that receives a pointer + the language handler data. + + UnwindParams - Additional parameters shared with caller. + +Return Value: + + STATUS_SUCCESS if the unwind could be completed, a failure status otherwise. + Unwind can only fail when validation bounds are specified. + +--*/ + +{ + ULONG AccumulatedSaveNexts; + ULONG CurCode; + ULONG EpilogScopeCount; + PEXCEPTION_ROUTINE ExceptionHandler; + PVOID ExceptionHandlerData; + BOOLEAN FinalPcFromRa; + ULONG FunctionLength; + ULONG HeaderWord; + ULONG NextCode, NextCode1, NextCode2; + DWORD64 OffsetInFunction; + ULONG ScopeNum; + ULONG ScopeSize; + ULONG ScopeStart; + DWORD64 SkipWords; + NTSTATUS Status; + ULONG_PTR UnwindCodePtr; + ULONG_PTR UnwindCodesEndPtr; + ULONG_PTR UnwindDataPtr; + ULONG UnwindIndex; + ULONG UnwindWords; + + // + // Unless a special frame is encountered, assume that any unwinding + // will return us to the return address of a call and set the flag + // appropriately (it will be cleared again if the special cases apply). + // + + ContextRecord->ContextFlags |= CONTEXT_UNWOUND_TO_CALL; + + // + // By default, unwinding is done by popping to the RA, then copying + // that RA to the PC. However, some special opcodes require different + // behavior. + // + + FinalPcFromRa = TRUE; + + // + // Fetch the header word from the .xdata blob + // + + UnwindDataPtr = ImageBase + FunctionEntry->UnwindData; + HeaderWord = MEMORY_READ_DWORD(UnwindParams, UnwindDataPtr); + UnwindDataPtr += 4; + + // + // Verify the version before we do anything else + // + + if (((HeaderWord >> 18) & 3) != 0) { + assert(!"ShouldNotReachHere"); + return STATUS_UNWIND_UNSUPPORTED_VERSION; + } + + FunctionLength = HeaderWord & 0x3ffff; + OffsetInFunction = (ControlPcRva - FunctionEntry->BeginAddress) / 4; + + // + // Determine the number of epilog scope records and the maximum number + // of unwind codes. + // + + UnwindWords = (HeaderWord >> 27) & 31; + EpilogScopeCount = (HeaderWord >> 22) & 31; + if (EpilogScopeCount == 0 && UnwindWords == 0) { + EpilogScopeCount = MEMORY_READ_DWORD(UnwindParams, UnwindDataPtr); + UnwindDataPtr += 4; + UnwindWords = (EpilogScopeCount >> 16) & 0xff; + EpilogScopeCount &= 0xffff; + } + if ((HeaderWord & (1 << 21)) != 0) { + UnwindIndex = EpilogScopeCount; + EpilogScopeCount = 0; + } + + // + // If exception data is present, extract it now. + // + + ExceptionHandler = NULL; + ExceptionHandlerData = NULL; + if ((HeaderWord & (1 << 20)) != 0) { + ExceptionHandler = (PEXCEPTION_ROUTINE)(ImageBase + + MEMORY_READ_DWORD(UnwindParams, UnwindDataPtr + 4 * (EpilogScopeCount + UnwindWords))); + ExceptionHandlerData = (PVOID)(UnwindDataPtr + 4 * (EpilogScopeCount + UnwindWords + 1)); + } + + // + // Unless we are in a prolog/epilog, we execute the unwind codes + // that immediately follow the epilog scope list. + // + + UnwindCodePtr = UnwindDataPtr + 4 * EpilogScopeCount; + UnwindCodesEndPtr = UnwindCodePtr + 4 * UnwindWords; + SkipWords = 0; + + // + // If we're near the start of the function, and this function has a prolog, + // compute the size of the prolog from the unwind codes. If we're in the + // midst of it, we still execute starting at unwind code index 0, but we may + // need to skip some to account for partial execution of the prolog. + // + // N.B. As an optimization here, note that each byte of unwind codes can + // describe at most one 32-bit instruction. Thus, the largest prologue + // that could possibly be described by UnwindWords (which is 4 * the + // number of unwind code bytes) is 4 * UnwindWords words. If + // OffsetInFunction is larger than this value, it is guaranteed to be + // in the body of the function. + // + + if (OffsetInFunction < 4 * UnwindWords) { + ScopeSize = RtlpComputeScopeSize(UnwindCodePtr, UnwindCodesEndPtr, FALSE, UnwindParams); + + if (OffsetInFunction < ScopeSize) { + SkipWords = ScopeSize - OffsetInFunction; + ExceptionHandler = NULL; + ExceptionHandlerData = NULL; + goto ExecuteCodes; + } + } + + // + // We're not in the prolog, now check to see if we are in the epilog. + // In the simple case, the 'E' bit is set indicating there is a single + // epilog that lives at the end of the function. If we're near the end + // of the function, compute the actual size of the epilog from the + // unwind codes. If we're in the midst of it, adjust the unwind code + // pointer to the start of the codes and determine how many we need to skip. + // + // N.B. Similar to the prolog case above, the maximum number of halfwords + // that an epilog can cover is limited by UnwindWords. In the epilog + // case, however, the starting index within the unwind code table is + // non-zero, and so the maximum number of unwind codes that can pertain + // to an epilog is (UnwindWords * 4 - UnwindIndex), thus further + // constraining the bounds of the epilog. + // + + if ((HeaderWord & (1 << 21)) != 0) { + if (OffsetInFunction + (4 * UnwindWords - UnwindIndex) >= FunctionLength) { + ScopeSize = RtlpComputeScopeSize(UnwindCodePtr + UnwindIndex, UnwindCodesEndPtr, TRUE, UnwindParams); + ScopeStart = FunctionLength - ScopeSize; + + if (OffsetInFunction >= ScopeStart) { + UnwindCodePtr += UnwindIndex; + SkipWords = OffsetInFunction - ScopeStart; + ExceptionHandler = NULL; + ExceptionHandlerData = NULL; + } + } + } + + // + // In the multiple-epilog case, we scan forward to see if we are within + // shooting distance of any of the epilogs. If we are, we compute the + // actual size of the epilog from the unwind codes and proceed like the + // simple case above. + // + + else { + for (ScopeNum = 0; ScopeNum < EpilogScopeCount; ScopeNum++) { + HeaderWord = MEMORY_READ_DWORD(UnwindParams, UnwindDataPtr); + UnwindDataPtr += 4; + + // + // The scope records are stored in order. If we hit a record that + // starts after our current position, we must not be in an epilog. + // + + ScopeStart = HeaderWord & 0x3ffff; + if (OffsetInFunction < ScopeStart) { + break; + } + + UnwindIndex = HeaderWord >> 22; + if (OffsetInFunction < ScopeStart + (4 * UnwindWords - UnwindIndex)) { + ScopeSize = RtlpComputeScopeSize(UnwindCodePtr + UnwindIndex, UnwindCodesEndPtr, TRUE, UnwindParams); + + if (OffsetInFunction < ScopeStart + ScopeSize) { + + UnwindCodePtr += UnwindIndex; + SkipWords = OffsetInFunction - ScopeStart; + ExceptionHandler = NULL; + ExceptionHandlerData = NULL; + break; + } + } + } + } + +ExecuteCodes: + + // + // Skip over unwind codes until we account for the number of halfwords + // to skip. + // + + while (UnwindCodePtr < UnwindCodesEndPtr && SkipWords > 0) { + CurCode = MEMORY_READ_BYTE(UnwindParams, UnwindCodePtr); + if (OPCODE_IS_END(CurCode)) { + break; + } + UnwindCodePtr += UnwindCodeSizeTable[CurCode]; + SkipWords--; + } + + // + // Now execute codes until we hit the end. + // + + Status = STATUS_SUCCESS; + AccumulatedSaveNexts = 0; + while (UnwindCodePtr < UnwindCodesEndPtr && Status == STATUS_SUCCESS) { + + CurCode = MEMORY_READ_BYTE(UnwindParams, UnwindCodePtr); + UnwindCodePtr += 1; + + // + // alloc_s (000xxxxx): allocate small stack with size < 1024 (2^5 * 16) + // + + if (CurCode <= 0x1f) { + if (AccumulatedSaveNexts != 0) { + return STATUS_UNWIND_INVALID_SEQUENCE; + } + ContextRecord->Sp += 16 * (CurCode & 0x1f); + } + + // + // alloc_m (11000xxx|xxxxxxxx): allocate large stack with size < 32k (2^11 * 16). + // + + else if (CurCode <= 0xc7) { + if (AccumulatedSaveNexts != 0) { + return STATUS_UNWIND_INVALID_SEQUENCE; + } + ContextRecord->Sp += 16 * ((CurCode & 7) << 8); + ContextRecord->Sp += 16 * MEMORY_READ_BYTE(UnwindParams, UnwindCodePtr); + UnwindCodePtr++; + } + + // + // save_reg (11010000|000xxxxx|zzzzzzzz): save reg r(1+#X) at [sp+#Z*8], offset <= 2047 + // + + else if (CurCode == 0xd0) { + if (AccumulatedSaveNexts != 0) { + return STATUS_UNWIND_INVALID_SEQUENCE; + } + NextCode = MEMORY_READ_BYTE(UnwindParams, UnwindCodePtr); + UnwindCodePtr++; + NextCode1 = (uint8_t)MEMORY_READ_BYTE(UnwindParams, UnwindCodePtr); + UnwindCodePtr++; + Status = RtlpUnwindRestoreRegisterRange( + ContextRecord, + 8 * NextCode1, + 1 + NextCode, + 1, + UnwindParams); + } + + // + // save_freg (11011100|0xxxzzzz|zzzzzzzz): save reg f(24+#X) at [sp+#Z*8], offset <= 32767 + // + + else if (CurCode == 0xdc) { + if (AccumulatedSaveNexts != 0) { + return STATUS_UNWIND_INVALID_SEQUENCE; + } + NextCode = MEMORY_READ_BYTE(UnwindParams, UnwindCodePtr); + UnwindCodePtr++; + NextCode1 = MEMORY_READ_BYTE(UnwindParams, UnwindCodePtr); + UnwindCodePtr++; + Status = RtlpUnwindRestoreFpRegisterRange( + ContextRecord, + 8 * (((NextCode & 0xf) << 8) + NextCode1), + 24 + (NextCode >> 4), + 1, + UnwindParams); + } + + // + // alloc_l (11100000|xxxxxxxx|xxxxxxxx|xxxxxxxx): allocate large stack with size < 256M + // + + else if (CurCode == 0xe0) { + if (AccumulatedSaveNexts != 0) { + return STATUS_UNWIND_INVALID_SEQUENCE; + } + ContextRecord->Sp += 16 * (MEMORY_READ_BYTE(UnwindParams, UnwindCodePtr) << 16); + UnwindCodePtr++; + ContextRecord->Sp += 16 * (MEMORY_READ_BYTE(UnwindParams, UnwindCodePtr) << 8); + UnwindCodePtr++; + ContextRecord->Sp += 16 * MEMORY_READ_BYTE(UnwindParams, UnwindCodePtr); + UnwindCodePtr++; + } + + // + // set_fp (11100001): set up fp: with: ori fp,sp,0 + // + + else if (CurCode == 0xe1) { + if (AccumulatedSaveNexts != 0) { + return STATUS_UNWIND_INVALID_SEQUENCE; + } + ContextRecord->Sp = ContextRecord->Fp; + } + + // + // add_fp (11100010|000xxxxx|xxxxxxxx): set up fp with: addi.d fp,sp,#x*8 + // + + else if (CurCode == 0xe2) { + if (AccumulatedSaveNexts != 0) { + return STATUS_UNWIND_INVALID_SEQUENCE; + } + NextCode = MEMORY_READ_BYTE(UnwindParams, UnwindCodePtr); + UnwindCodePtr++; + NextCode1 = MEMORY_READ_BYTE(UnwindParams, UnwindCodePtr); + UnwindCodePtr++; + ContextRecord->Sp = ContextRecord->Fp - 8 * ((NextCode << 8) | NextCode1); + } + + // + // nop (11100011): no unwind operation is required + // + + else if (CurCode == 0xe3) { + if (AccumulatedSaveNexts != 0) { + return STATUS_UNWIND_INVALID_SEQUENCE; + } + } + + // + // end (11100100): end of unwind code + // + + else if (CurCode == 0xe4) { + if (AccumulatedSaveNexts != 0) { + return STATUS_UNWIND_INVALID_SEQUENCE; + } + goto finished; + } + + // + // end_c (11100101): end of unwind code in current chained scope + // + + else if (CurCode == 0xe5) { + if (AccumulatedSaveNexts != 0) { + return STATUS_UNWIND_INVALID_SEQUENCE; + } + goto finished; + } + + // + // custom_0 (111010xx): restore custom structure + // + + else if (CurCode >= 0xe8 && CurCode <= 0xeb) { + if (AccumulatedSaveNexts != 0) { + return STATUS_UNWIND_INVALID_SEQUENCE; + } + Status = RtlpUnwindCustom(ContextRecord, (BYTE) CurCode, UnwindParams); + FinalPcFromRa = FALSE; + } + + // + // Anything else is invalid + // + + else { + return STATUS_UNWIND_INVALID_SEQUENCE; + } + } + + // + // If we succeeded, post-process the results a bit + // +finished: + if (Status == STATUS_SUCCESS) { + + // + // Since we always POP to the RA, recover the final PC from there, unless + // it was overwritten due to a special case custom unwinding operation. + // Also set the establisher frame equal to the final stack pointer. + // + + if (FinalPcFromRa) { + ContextRecord->Pc = ContextRecord->Ra; + } + + *EstablisherFrame = ContextRecord->Sp; + + if (ARGUMENT_PRESENT(HandlerRoutine)) { + *HandlerRoutine = ExceptionHandler; + } + *HandlerData = ExceptionHandlerData; + } + + return Status; +} + +NTSTATUS +RtlpUnwindFunctionCompact( + _In_ DWORD64 ControlPcRva, + _In_ PT_RUNTIME_FUNCTION FunctionEntry, + __inout T_CONTEXT *ContextRecord, + _Out_ PDWORD64 EstablisherFrame, + __deref_opt_out_opt PEXCEPTION_ROUTINE *HandlerRoutine, + _Out_ PVOID *HandlerData, + _In_ PRISCV64_UNWIND_PARAMS UnwindParams + ) + +/*++ + +Routine Description: + + This function virtually unwinds the specified function by parsing the + compact .pdata record to determine where in the function the provided + ControlPc is, and then executing a standard, well-defined set of + operations. + + If a context pointers record is specified (in the UnwindParams), then + the address where each nonvolatile register is restored from is recorded + in the appropriate element of the context pointers record. + +Arguments: + + ControlPcRva - Supplies the address where control left the specified + function, as an offset relative to the ImageBase. + + FunctionEntry - Supplies the address of the function table entry for the + specified function. If appropriate, this should have already been + probed. + + ContextRecord - Supplies the address of a context record. + + EstablisherFrame - Supplies a pointer to a variable that receives the + the establisher frame pointer value. + + HandlerRoutine - Supplies an optional pointer to a variable that receives + the handler routine address. If control did not leave the specified + function in either the prolog or an epilog and a handler of the + proper type is associated with the function, then the address of the + language specific exception handler is returned. Otherwise, NULL is + returned. + + HandlerData - Supplies a pointer to a variable that receives a pointer + the language handler data. + + UnwindParams - Additional parameters shared with caller. + +Return Value: + + STATUS_SUCCESS if the unwind could be completed, a failure status otherwise. + Unwind can only fail when validation bounds are specified. + +--*/ + +{ + ULONG Count; + ULONG Cr; + ULONG CurrentOffset; + ULONG EpilogLength; + ULONG Flag; + ULONG FloatSize; + ULONG FrameSize; + ULONG FRegOpcodes; + ULONG FunctionLength; + ULONG HBit; + ULONG HOpcodes; + ULONG IRegOpcodes; + ULONG IntSize; + ULONG LocalSize; + DWORD64 OffsetInFunction; + DWORD64 OffsetInScope; + ULONG PrologLength; + ULONG RegF; + ULONG RegI; + ULONG RegSize; + ULONG ScopeStart; + ULONG StackAdjustOpcodes; + NTSTATUS Status; + ULONG UnwindData; + + UnwindData = FunctionEntry->UnwindData; + Status = STATUS_SUCCESS; + + // + // Compact records always describe an unwind to a call. + // + + ContextRecord->ContextFlags |= CONTEXT_UNWOUND_TO_CALL; + + // + // Extract the basic information about how to do a full unwind. + // + + Flag = UnwindData & 3; + FunctionLength = (UnwindData >> 2) & 0x7ff; + RegF = (UnwindData >> 13) & 7; + RegI = (UnwindData >> 16) & 0xf; + HBit = (UnwindData >> 20) & 1; + Cr = (UnwindData >> 21) & 3; + FrameSize = (UnwindData >> 23) & 0x1ff; + + assert(!"---------------RISCV64 ShouldNotReachHere"); + if (Flag == 3) { + return STATUS_UNWIND_INVALID_SEQUENCE; + } + if (Cr == 2) { + return STATUS_UNWIND_INVALID_SEQUENCE; + } + + // + // Determine the size of the locals + // + + IntSize = RegI * 8; + if (Cr == 1) { + IntSize += 8; + } + FloatSize = (RegF == 0) ? 0 : (RegF + 1) * 8; + RegSize = (IntSize + FloatSize + 8*8 * HBit + 0xf) & ~0xf; + if (RegSize > 16 * FrameSize) { + return STATUS_UNWIND_INVALID_SEQUENCE; + } + LocalSize = 16 * FrameSize - RegSize; + + // + // If we're near the start of the function (within 17 words), + // see if we are within the prolog. + // + // N.B. If the low 2 bits of the UnwindData are 2, then we have + // no prolog. + // + + OffsetInFunction = (ControlPcRva - FunctionEntry->BeginAddress) / 4; + OffsetInScope = 0; + if (OffsetInFunction < 17 && Flag != 2) { + + // + // Compute sizes for each opcode in the prolog. + // + + IRegOpcodes = (IntSize + 8) / 16; + FRegOpcodes = (FloatSize + 8) / 16; + HOpcodes = 4 * HBit; + StackAdjustOpcodes = (Cr == 3) ? 1 : 0; + if (Cr != 3 || LocalSize > 512) { + StackAdjustOpcodes += (LocalSize > 4088) ? 2 : (LocalSize > 0) ? 1 : 0; + } + + // + // Compute the total prolog length and determine if we are within + // its scope. + // + // N.B. We must execute prolog operations backwards to unwind, so + // our final scope offset in this case is the distance from the end. + // + + PrologLength = IRegOpcodes + FRegOpcodes + HOpcodes + StackAdjustOpcodes; + + if (OffsetInFunction < PrologLength) { + OffsetInScope = PrologLength - OffsetInFunction; + } + } + + // + // If we're near the end of the function (within 15 words), see if + // we are within the epilog. + // + // N.B. If the low 2 bits of the UnwindData are 2, then we have + // no epilog. + // + + if (OffsetInScope == 0 && OffsetInFunction + 15 >= FunctionLength && Flag != 2) { + + // + // Compute sizes for each opcode in the epilog. + // + + IRegOpcodes = (IntSize + 8) / 16; + FRegOpcodes = (FloatSize + 8) / 16; + HOpcodes = HBit; + StackAdjustOpcodes = (Cr == 3) ? 1 : 0; + if (Cr != 3 || LocalSize > 512) { + StackAdjustOpcodes += (LocalSize > 4088) ? 2 : (LocalSize > 0) ? 1 : 0; + } + + // + // Compute the total epilog length and determine if we are within + // its scope. + // + + EpilogLength = IRegOpcodes + FRegOpcodes + HOpcodes + StackAdjustOpcodes + 1; + + ScopeStart = FunctionLength - EpilogLength; + if (OffsetInFunction > ScopeStart) { + OffsetInScope = OffsetInFunction - ScopeStart; + } + } + + // + // Process operations backwards, in the order: stack/frame deallocation, + // VFP register popping, integer register popping, parameter home + // area recovery. + // + // First case is simple: we process everything with no regard for + // the current offset within the scope. + // + + Status = STATUS_SUCCESS; + if (OffsetInScope == 0) { + + if (Cr == 3) { + Status = RtlpUnwindRestoreRegisterRange(ContextRecord, 0, 22, 1, UnwindParams);///fp + assert(Status == STATUS_SUCCESS); + Status = RtlpUnwindRestoreRegisterRange(ContextRecord, 0, 1, 1, UnwindParams);//ra + } + ContextRecord->Sp += LocalSize; + + if (RegF != 0 && Status == STATUS_SUCCESS) { + Status = RtlpUnwindRestoreFpRegisterRange(ContextRecord, IntSize, 24, RegF + 1, UnwindParams);//fs0 + } + + if (Cr == 1 && Status == STATUS_SUCCESS) { + Status = RtlpUnwindRestoreRegisterRange(ContextRecord, IntSize - 8, 1, 1, UnwindParams);//ra + } + if (RegI > 0 && Status == STATUS_SUCCESS) { + Status = RtlpUnwindRestoreRegisterRange(ContextRecord, 0, 23, RegI, UnwindParams);//s0 + } + ContextRecord->Sp += RegSize; + } + + // + // Second case is more complex: we must step along each operation + // to ensure it should be executed. + // + + else { + + CurrentOffset = 0; + if (Cr == 3) { + if (LocalSize <= 512) { + if (CurrentOffset++ >= OffsetInScope) { + Status = RtlpUnwindRestoreRegisterRange(ContextRecord, -(LONG)LocalSize, 22, 1, UnwindParams); + Status = RtlpUnwindRestoreRegisterRange(ContextRecord, -(LONG)LocalSize, 1, 1, UnwindParams); + } + LocalSize = 0; + } + } + while (LocalSize != 0) { + Count = (LocalSize + 4087) % 4088 + 1; + if (CurrentOffset++ >= OffsetInScope) { + ContextRecord->Sp += Count; + } + LocalSize -= Count; + } + + if (HBit != 0) { + CurrentOffset += 4; + } + + if (RegF != 0 && Status == STATUS_SUCCESS) { + RegF++; + while (RegF != 0) { + Count = 2 - (RegF & 1); + RegF -= Count; + if (CurrentOffset++ >= OffsetInScope) { + Status = RtlpUnwindRestoreFpRegisterRange( + ContextRecord, + (RegF == 0 && RegI == 0) ? (-(LONG)RegSize) : (IntSize + 8 * RegF), + 24 + RegF, + Count, + UnwindParams); + } + } + } + + if (Cr == 1 && Status == STATUS_SUCCESS) { + if (RegI % 2 == 0) { + if (CurrentOffset++ >= OffsetInScope) { + Status = RtlpUnwindRestoreRegisterRange(ContextRecord, IntSize - 8, 31, 1, UnwindParams);//s8 ? + } + } else { + if (CurrentOffset++ >= OffsetInScope) { + RegI--; + Status = RtlpUnwindRestoreRegisterRange(ContextRecord, IntSize - 8, 2, 1, UnwindParams);//tp ? + if (Status == STATUS_SUCCESS) { + Status = RtlpUnwindRestoreRegisterRange(ContextRecord, IntSize - 16, 23 + RegI, 1, UnwindParams); + } + } + } + } + + while (RegI != 0 && Status == STATUS_SUCCESS) { + Count = 2 - (RegI & 1); + RegI -= Count; + if (CurrentOffset++ >= OffsetInScope) { + Status = RtlpUnwindRestoreRegisterRange( + ContextRecord, + (RegI == 0) ? (-(LONG)RegSize) : (8 * RegI), + 23 + RegI, + Count, + UnwindParams); + } + } + } + + // + // If we succeeded, post-process the results a bit + // + + if (Status == STATUS_SUCCESS) { + + ContextRecord->Pc = ContextRecord->Ra; + *EstablisherFrame = ContextRecord->Sp; + + if (ARGUMENT_PRESENT(HandlerRoutine)) { + *HandlerRoutine = NULL; + } + *HandlerData = NULL; + } + + return Status; +} + +BOOL OOPStackUnwinderRISCV64::Unwind(T_CONTEXT * pContext) +{ + DWORD64 ImageBase = 0; + HRESULT hr = GetModuleBase(pContext->Pc, &ImageBase); + if (hr != S_OK) + return FALSE; + + PEXCEPTION_ROUTINE DummyHandlerRoutine; + PVOID DummyHandlerData; + DWORD64 DummyEstablisherFrame; + + DWORD64 startingPc = pContext->Pc; + DWORD64 startingSp = pContext->Sp; + + T_RUNTIME_FUNCTION Rfe; + if (FAILED(GetFunctionEntry(pContext->Pc, &Rfe, sizeof(Rfe)))) + return FALSE; + + if ((Rfe.UnwindData & 3) != 0) + { + hr = RtlpUnwindFunctionCompact(pContext->Pc - ImageBase, + &Rfe, + pContext, + &DummyEstablisherFrame, + &DummyHandlerRoutine, + &DummyHandlerData, + NULL); + + } + else + { + hr = RtlpUnwindFunctionFull(pContext->Pc - ImageBase, + ImageBase, + &Rfe, + pContext, + &DummyEstablisherFrame, + &DummyHandlerRoutine, + &DummyHandlerData, + NULL); + } + + // PC == 0 means unwinding is finished. + // Same if no forward progress is made + if (pContext->Pc == 0 || (startingPc == pContext->Pc && startingSp == pContext->Sp)) + return FALSE; + + return TRUE; +} + +BOOL DacUnwindStackFrame(T_CONTEXT *pContext, T_KNONVOLATILE_CONTEXT_POINTERS* pContextPointers) +{ + OOPStackUnwinderRISCV64 unwinder; + BOOL res = unwinder.Unwind(pContext); + + if (res && pContextPointers) + { + for (int i = 0; i < 11; i++) + { + *(&pContextPointers->S1 + i) = &pContext->S1 + i; + } + pContextPointers->Fp = &pContext->Fp; + pContextPointers->Ra = &pContext->Ra; + } + + return res; +} + +#if defined(HOST_UNIX) +PEXCEPTION_ROUTINE +RtlVirtualUnwind( + IN ULONG HandlerType, + IN ULONG64 ImageBase, + IN ULONG64 ControlPc, + IN PT_RUNTIME_FUNCTION FunctionEntry, + IN OUT PCONTEXT ContextRecord, + OUT PVOID *HandlerData, + OUT PULONG64 EstablisherFrame, + IN OUT PT_KNONVOLATILE_CONTEXT_POINTERS ContextPointers OPTIONAL + ) +{ + PEXCEPTION_ROUTINE handlerRoutine; + HRESULT hr; + + DWORD64 startingPc = ControlPc; + DWORD64 startingSp = ContextRecord->Sp; + + T_RUNTIME_FUNCTION rfe; + + rfe.BeginAddress = FunctionEntry->BeginAddress; + rfe.UnwindData = FunctionEntry->UnwindData; + + RISCV64_UNWIND_PARAMS unwindParams; + unwindParams.ContextPointers = ContextPointers; + + if ((rfe.UnwindData & 3) != 0) + { + hr = RtlpUnwindFunctionCompact(ControlPc - ImageBase, + &rfe, + ContextRecord, + EstablisherFrame, + &handlerRoutine, + HandlerData, + &unwindParams); + + } + else + { + hr = RtlpUnwindFunctionFull(ControlPc - ImageBase, + ImageBase, + &rfe, + ContextRecord, + EstablisherFrame, + &handlerRoutine, + HandlerData, + &unwindParams); + } + + return handlerRoutine; +} +#endif diff --git a/src/coreclr/unwinder/riscv64/unwinder.h b/src/coreclr/unwinder/riscv64/unwinder.h index efcd109cceb5dc..e27a4b19e46f77 100644 --- a/src/coreclr/unwinder/riscv64/unwinder.h +++ b/src/coreclr/unwinder/riscv64/unwinder.h @@ -8,6 +8,45 @@ #include "baseunwinder.h" -#error "TODO-RISCV64: missing implementation" +// #error "TODO-RISCV64: missing implementation" +//--------------------------------------------------------------------------------------- +// +// See the comment for the base class code:OOPStackUnwinder. +// + +class OOPStackUnwinderRISCV64 : public OOPStackUnwinder +{ +public: + // Unwind the given CONTEXT to the caller CONTEXT. The CONTEXT will be overwritten. + BOOL Unwind(T_CONTEXT * pContext); + + // + // Everything below comes from dbghelp.dll. + // + +protected: + HRESULT UnwindPrologue(_In_ DWORD64 ImageBase, + _In_ DWORD64 ControlPc, + _In_ DWORD64 FrameBase, + _In_ _PIMAGE_RUNTIME_FUNCTION_ENTRY FunctionEntry, + __inout PT_CONTEXT ContextRecord); + + HRESULT VirtualUnwind(_In_ DWORD64 ImageBase, + _In_ DWORD64 ControlPc, + _In_ _PIMAGE_RUNTIME_FUNCTION_ENTRY FunctionEntry, + __inout PT_CONTEXT ContextRecord, + _Out_ PDWORD64 EstablisherFrame); + + DWORD64 LookupPrimaryUnwindInfo + (_In_ _PIMAGE_RUNTIME_FUNCTION_ENTRY FunctionEntry, + _In_ DWORD64 ImageBase, + _Out_ _PIMAGE_RUNTIME_FUNCTION_ENTRY PrimaryEntry); + + _PIMAGE_RUNTIME_FUNCTION_ENTRY SameFunction + (_In_ _PIMAGE_RUNTIME_FUNCTION_ENTRY FunctionEntry, + _In_ DWORD64 ImageBase, + _In_ DWORD64 ControlPc, + _Out_ _PIMAGE_RUNTIME_FUNCTION_ENTRY FunctionReturnBuffer); +}; #endif // __unwinder_riscv64__ diff --git a/src/coreclr/utilcode/util.cpp b/src/coreclr/utilcode/util.cpp index 0901e06d1f3028..76f07e451c47c2 100644 --- a/src/coreclr/utilcode/util.cpp +++ b/src/coreclr/utilcode/util.cpp @@ -561,7 +561,7 @@ BYTE * ClrVirtualAllocWithinRange(const BYTE *pMinAddr, /*static*/ CPU_Group_Info *CPUGroupInfo::m_CPUGroupInfoArray = NULL; /*static*/ LONG CPUGroupInfo::m_initialization = 0; -#if !defined(FEATURE_NATIVEAOT) && (defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) +#if !defined(FEATURE_NATIVEAOT) && (defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)) // Calculate greatest common divisor DWORD GCD(DWORD u, DWORD v) { @@ -591,7 +591,7 @@ DWORD LCM(DWORD u, DWORD v) } CONTRACTL_END; -#if !defined(FEATURE_NATIVEAOT) && (defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) +#if !defined(FEATURE_NATIVEAOT) && (defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)) BYTE *bBuffer = NULL; SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *pSLPIEx = NULL; SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *pRecord = NULL; @@ -672,7 +672,7 @@ DWORD LCM(DWORD u, DWORD v) } CONTRACTL_END; -#if !defined(FEATURE_NATIVEAOT) && (defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) +#if !defined(FEATURE_NATIVEAOT) && (defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)) USHORT groupCount = 0; // On Windows 11+ and Windows Server 2022+, a process is no longer restricted to a single processor group by default. @@ -758,7 +758,7 @@ DWORD LCM(DWORD u, DWORD v) { LIMITED_METHOD_CONTRACT; -#if !defined(FEATURE_NATIVEAOT) && (defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) +#if !defined(FEATURE_NATIVEAOT) && (defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)) WORD bTemp = 0; WORD bDiff = processor_number - bTemp; @@ -789,7 +789,7 @@ DWORD LCM(DWORD u, DWORD v) } CONTRACTL_END; -#if !defined(FEATURE_NATIVEAOT) && (defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) +#if !defined(FEATURE_NATIVEAOT) && (defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)) _ASSERTE(m_enableGCCPUGroups && m_threadUseAllCpuGroups); PROCESSOR_NUMBER proc_no; @@ -838,7 +838,7 @@ DWORD LCM(DWORD u, DWORD v) } CONTRACTL_END; -#if (defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) +#if (defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)) WORD i, minGroup = 0; DWORD minWeight = 0; @@ -880,7 +880,7 @@ DWORD LCM(DWORD u, DWORD v) /*static*/ void CPUGroupInfo::ClearCPUGroupAffinity(GROUP_AFFINITY *gf) { LIMITED_METHOD_CONTRACT; -#if (defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) +#if (defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)) _ASSERTE(m_enableGCCPUGroups && m_threadUseAllCpuGroups && m_threadAssignCpuGroups); WORD group = gf->Group; diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index 7d7826b5a0d70a..f09f43c432d664 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -758,6 +758,14 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S ) + elseif(CLR_CMAKE_TARGET_ARCH_RISCV64) + set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/asmhelpers.S + ${ARCH_SOURCES_DIR}/calldescrworkerriscv64.S + ${ARCH_SOURCES_DIR}/crthelpers.S + ${ARCH_SOURCES_DIR}/pinvokestubs.S + ${ARCH_SOURCES_DIR}/thunktemplates.S + ) endif() endif(CLR_CMAKE_TARGET_WIN32) @@ -860,7 +868,22 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) ${ARCH_SOURCES_DIR}/singlestepper.cpp ) endif(CLR_CMAKE_HOST_UNIX) -elseif(CLR_CMAKE_TARGET_ARCH_LOONGARCH64) +elseif(clr_cmake_target_arch_loongarch64) + set(vm_sources_dac_and_wks_arch + ${arch_sources_dir}/stubs.cpp + exceptionhandling.cpp + ) + + set(vm_headers_dac_and_wks_arch + ${arch_sources_dir}/virtualcallstubcpu.hpp + exceptionhandling.h + ) + + set(vm_sources_wks_arch + ${arch_sources_dir}/profiler.cpp + gcinfodecoder.cpp + ) +elseif(CLR_CMAKE_TARGET_ARCH_RISCV64) set(VM_SOURCES_DAC_AND_WKS_ARCH ${ARCH_SOURCES_DIR}/stubs.cpp exceptionhandling.cpp diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index ea29ec2bdce028..39c3184aaea07f 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // - #ifndef TARGET_ARM64 #error Should only include "cGenCpu.h" for ARM64 builds #endif diff --git a/src/coreclr/vm/callcounting.h b/src/coreclr/vm/callcounting.h index 3d25e1c2826267..f47bfc05c16757 100644 --- a/src/coreclr/vm/callcounting.h +++ b/src/coreclr/vm/callcounting.h @@ -99,6 +99,8 @@ class CallCountingStub static const int CodeSize = 32; #elif defined(TARGET_LOONGARCH64) static const int CodeSize = 40; +#elif defined(TARGET_RISCV64) + static const int CodeSize = 40; // TODO RISCV64 #endif private: diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 364c850fca49b3..6841f3eee9ca63 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -41,7 +41,7 @@ struct ArgLocDesc int m_byteStackIndex; // Stack offset in bytes (or -1) int m_byteStackSize; // Stack size in bytes -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) int m_structFields; // Struct field info when using Float-register except two-doubles case. #endif @@ -96,7 +96,7 @@ struct ArgLocDesc #if defined(TARGET_ARM64) m_hfaFieldSize = 0; #endif // defined(TARGET_ARM64) -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) m_structFields = STRUCT_NO_FLOAT_FIELD; #endif #if defined(UNIX_AMD64_ABI) @@ -164,6 +164,29 @@ struct TransitionBlock }; //TADDR padding; // Keep size of TransitionBlock as multiple of 16-byte. Simplifies code in PROLOG_WITH_TRANSITION_BLOCK ArgumentRegisters m_argumentRegisters; +#elif defined(TARGET_RISCV64) + union { + CalleeSavedRegisters m_calleeSavedRegisters; + struct { + INT64 s0; // frame pointer + TADDR m_ReturnAddress; + INT64 s1; + INT64 s2; + INT64 s3; + INT64 s4; + INT64 s5; + INT64 s6; + INT64 s7; + INT64 s8; + INT64 s9; + INT64 s10; + INT64 s11; + INT64 tp; + INT64 gp; + }; + }; + //TADDR padding; // Keep size of TransitionBlock as multiple of 16-byte. Simplifies code in PROLOG_WITH_TRANSITION_BLOCK + ArgumentRegisters m_argumentRegisters; #else PORTABILITY_ASSERT("TransitionBlock"); #endif @@ -505,6 +528,8 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE #elif defined(TARGET_LOONGARCH64) // Composites greater than 16 bytes are passed by reference return (size > ENREGISTERED_PARAMTYPE_MAXSIZE); +#elif defined(TARGET_RISCV64) + return (size > ENREGISTERED_PARAMTYPE_MAXSIZE); #else PORTABILITY_ASSERT("ArgIteratorTemplate::IsArgPassedByRef"); return FALSE; @@ -566,6 +591,13 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE return (m_argSize > ENREGISTERED_PARAMTYPE_MAXSIZE); } return FALSE; +#elif defined(TARGET_RISCV64) + if (m_argType == ELEMENT_TYPE_VALUETYPE) + { + _ASSERTE(!m_argTypeHandle.IsNull()); + return ((m_argSize > ENREGISTERED_PARAMTYPE_MAXSIZE) && (!m_argTypeHandle.IsHFA() || this->IsVarArg())); + } + return FALSE; #else PORTABILITY_ASSERT("ArgIteratorTemplate::IsArgPassedByRef"); return FALSE; @@ -627,7 +659,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE ArgLocDesc* GetArgLocDescForStructInRegs() { -#if defined(UNIX_AMD64_ABI) || defined (TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(UNIX_AMD64_ABI) || defined (TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined (TARGET_RISCV64) return m_hasArgLocDescForStructInRegs ? &m_argLocDescForStructInRegs : NULL; #else return NULL; @@ -876,6 +908,56 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE #endif // TARGET_LOONGARCH64 +#ifdef TARGET_RISCV64 + // Get layout information for the argument that the ArgIterator is currently visiting. + void GetArgLoc(int argOffset, ArgLocDesc *pLoc) + { + LIMITED_METHOD_CONTRACT; + + pLoc->Init(); + + if (m_hasArgLocDescForStructInRegs) + { + *pLoc = m_argLocDescForStructInRegs; + return; + } + + if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) + { + // TODO-RISCV64: support SIMD. + // Dividing by 8 as size of each register in FloatArgumentRegisters is 8 bytes. + pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / 8; + + assert(!m_argTypeHandle.IsHFA()); + + pLoc->m_cFloatReg = 1; + + return; + } + + int cSlots = (GetArgSize() + 7)/ 8; + + // Composites greater than 16bytes are passed by reference + if (GetArgType() == ELEMENT_TYPE_VALUETYPE && GetArgSize() > ENREGISTERED_PARAMTYPE_MAXSIZE) + { + cSlots = 1; + } + + if (!TransitionBlock::IsStackArgumentOffset(argOffset)) + { + // At least one used integer register passed. + pLoc->m_idxGenReg = TransitionBlock::GetArgumentIndexFromOffset(argOffset); + pLoc->m_cGenReg = cSlots; + } + else + { + pLoc->m_byteStackIndex = TransitionBlock::GetStackArgumentByteIndexFromOffset(argOffset); + pLoc->m_byteStackSize = cSlots << 3; + } + + return; + } +#endif // TARGET_RISCV64 protected: DWORD m_dwFlags; // Cached flags int m_nSizeOfArgStack; // Cached value of SizeOfArgStack @@ -886,10 +968,10 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE CorElementType m_argType; int m_argSize; TypeHandle m_argTypeHandle; -#if (defined(TARGET_AMD64) && defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if (defined(TARGET_AMD64) && defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) ArgLocDesc m_argLocDescForStructInRegs; bool m_hasArgLocDescForStructInRegs; -#endif // (TARGET_AMD64 && UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // (TARGET_AMD64 && UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 int m_ofsStack; // Current position of the stack iterator, in bytes @@ -923,6 +1005,12 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE int m_idxFPReg; // Next FP register to be assigned a value #endif +#ifdef TARGET_RISCV64 + int m_idxGenReg; // Next general register to be assigned a value + int m_idxStack; // Next stack slot to be assigned a value + int m_idxFPReg; // Next FP register to be assigned a value +#endif + enum { ITERATION_STARTED = 0x0001, // Started iterating over arguments SIZE_OF_ARG_STACK_COMPUTED = 0x0002, @@ -1170,6 +1258,10 @@ int ArgIteratorTemplate::GetNextOffset() m_idxGenReg = numRegistersUsed; m_ofsStack = 0; m_idxFPReg = 0; +#elif defined(TARGET_RISCV64) + m_idxGenReg = numRegistersUsed; + m_ofsStack = 0; + m_idxFPReg = 0; #else PORTABILITY_ASSERT("ArgIteratorTemplate::GetNextOffset"); #endif @@ -1199,7 +1291,7 @@ int ArgIteratorTemplate::GetNextOffset() m_argSize = argSize; m_argTypeHandle = thValueType; -#if defined(UNIX_AMD64_ABI) || defined (TARGET_ARM64) || defined (TARGET_LOONGARCH64) +#if defined(UNIX_AMD64_ABI) || defined (TARGET_ARM64) || defined (TARGET_LOONGARCH64) || defined (TARGET_RISCV64) m_hasArgLocDescForStructInRegs = false; #endif @@ -1750,6 +1842,130 @@ int ArgIteratorTemplate::GetNextOffset() int argOfs = TransitionBlock::GetOffsetOfArgs() + m_ofsStack; m_ofsStack += ALIGN_UP(cbArg, TARGET_POINTER_SIZE); + return argOfs; +#elif defined(TARGET_RISCV64) + + int cFPRegs = 0; + int flags = 0; + + switch (argType) + { + + case ELEMENT_TYPE_R4: + // 32-bit floating point argument. + cFPRegs = 1; + break; + + case ELEMENT_TYPE_R8: + // 64-bit floating point argument. + cFPRegs = 1; + break; + + case ELEMENT_TYPE_VALUETYPE: + { + // Handle struct which containing floats or doubles that can be passed + // in FP registers if possible. + + // Composite greater than 16bytes should be passed by reference + if (argSize > ENREGISTERED_PARAMTYPE_MAXSIZE) + { + argSize = sizeof(TADDR); + } + else + { + MethodTable* pMethodTable = nullptr; + + if (!thValueType.IsTypeDesc()) + pMethodTable = thValueType.AsMethodTable(); + else + { + _ASSERTE(thValueType.IsNativeValueType()); + pMethodTable = thValueType.AsNativeValueType(); + } + _ASSERTE(pMethodTable != nullptr); + flags = MethodTable::GetRiscv64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable); + if (flags & STRUCT_HAS_FLOAT_FIELDS_MASK) + { + cFPRegs = (flags & STRUCT_FLOAT_FIELD_ONLY_TWO) ? 2 : 1; + } + } + + break; + } + + default: + break; + } + + const bool isValueType = (argType == ELEMENT_TYPE_VALUETYPE); + const bool isFloatHfa = thValueType.IsFloatHfa(); + const int cbArg = StackElemSize(argSize, isValueType, isFloatHfa); + + if (cFPRegs > 0 && !this->IsVarArg()) + { + if (flags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND)) + { + assert(cFPRegs == 1); + assert((STRUCT_FLOAT_FIELD_FIRST == (flags & STRUCT_HAS_FLOAT_FIELDS_MASK)) || (STRUCT_FLOAT_FIELD_SECOND == (flags & STRUCT_HAS_FLOAT_FIELDS_MASK))); + + if ((1 + m_idxFPReg <= NUM_ARGUMENT_REGISTERS) && (m_idxGenReg + 1 <= NUM_ARGUMENT_REGISTERS)) + { + m_argLocDescForStructInRegs.Init(); + m_argLocDescForStructInRegs.m_idxFloatReg = m_idxFPReg; + m_argLocDescForStructInRegs.m_cFloatReg = 1; + int argOfs = TransitionBlock::GetOffsetOfFloatArgumentRegisters() + m_idxFPReg * 8; + m_idxFPReg += 1; + + m_argLocDescForStructInRegs.m_structFields = flags; + + m_argLocDescForStructInRegs.m_idxGenReg = m_idxGenReg; + m_argLocDescForStructInRegs.m_cGenReg = 1; + m_idxGenReg += 1; + + m_hasArgLocDescForStructInRegs = true; + + return argOfs; + } + } + else if (cFPRegs + m_idxFPReg <= NUM_ARGUMENT_REGISTERS) + { + int argOfs = TransitionBlock::GetOffsetOfFloatArgumentRegisters() + m_idxFPReg * 8; + if (flags == STRUCT_FLOAT_FIELD_ONLY_TWO) // struct with two float-fields. + { + m_argLocDescForStructInRegs.Init(); + m_hasArgLocDescForStructInRegs = true; + m_argLocDescForStructInRegs.m_idxFloatReg = m_idxFPReg; + assert(cFPRegs == 2); + m_argLocDescForStructInRegs.m_cFloatReg = 2; + assert(argSize == 8); + m_argLocDescForStructInRegs.m_structFields = STRUCT_FLOAT_FIELD_ONLY_TWO; + } + m_idxFPReg += cFPRegs; + return argOfs; + } + } + + { + const int regSlots = ALIGN_UP(cbArg, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; + if (m_idxGenReg + regSlots <= NUM_ARGUMENT_REGISTERS) + { + int argOfs = TransitionBlock::GetOffsetOfArgumentRegisters() + m_idxGenReg * 8; + m_idxGenReg += regSlots; + return argOfs; + } + else if (m_idxGenReg < NUM_ARGUMENT_REGISTERS) + { + int argOfs = TransitionBlock::GetOffsetOfArgumentRegisters() + m_idxGenReg * 8; + m_ofsStack += (m_idxGenReg + regSlots - NUM_ARGUMENT_REGISTERS)*8; + assert(m_ofsStack == 8); + m_idxGenReg = NUM_ARGUMENT_REGISTERS; + return argOfs; + } + } + + int argOfs = TransitionBlock::GetOffsetOfArgs() + m_ofsStack; + m_ofsStack += ALIGN_UP(cbArg, TARGET_POINTER_SIZE); + return argOfs; #else PORTABILITY_ASSERT("ArgIteratorTemplate::GetNextOffset"); @@ -1786,7 +2002,7 @@ void ArgIteratorTemplate::ComputeReturnFlags() break; case ELEMENT_TYPE_R4: -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) flags |= STRUCT_FLOAT_FIELD_ONLY_ONE << RETURN_FP_SIZE_SHIFT; #else #ifndef ARM_SOFTFP @@ -1796,7 +2012,7 @@ void ArgIteratorTemplate::ComputeReturnFlags() break; case ELEMENT_TYPE_R8: -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) flags |= (STRUCT_FLOAT_FIELD_ONLY_ONE | STRUCT_FIRST_FIELD_SIZE_IS8) << RETURN_FP_SIZE_SHIFT; #else #ifndef ARM_SOFTFP @@ -1875,6 +2091,15 @@ void ArgIteratorTemplate::ComputeReturnFlags() flags = (MethodTable::GetLoongArch64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable) & 0xff) << RETURN_FP_SIZE_SHIFT; break; } +#elif defined(TARGET_RISCV64) + if (size <= ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE) + { + assert(!thValueType.IsTypeDesc()); + + MethodTable *pMethodTable = thValueType.AsMethodTable(); + flags = (MethodTable::GetRiscv64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable) & 0xff) << RETURN_FP_SIZE_SHIFT; + break; + } #else if (size <= ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE) break; diff --git a/src/coreclr/vm/ceeload.h b/src/coreclr/vm/ceeload.h index f1cf9000fea411..de2ad6bdcc9b59 100644 --- a/src/coreclr/vm/ceeload.h +++ b/src/coreclr/vm/ceeload.h @@ -87,6 +87,8 @@ class JITInlineTrackingMap; #define NATIVE_SYMBOL_READER_DLL W("Microsoft.DiaSymReader.Native.arm64.dll") #elif defined(HOST_LOONGARCH64) #define NATIVE_SYMBOL_READER_DLL W("Microsoft.DiaSymReader.Native.loongarch64.dll") +#elif defined(HOST_RISCV64) +#define NATIVE_SYMBOL_READER_DLL W("Microsoft.DiaSymReader.Native.riscv64.dll") #endif typedef DPTR(JITInlineTrackingMap) PTR_JITInlineTrackingMap; diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 471bd1628d32a3..7f71ea1ceafd55 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -805,7 +805,7 @@ ExecutionManager::DeleteRangeHelper //----------------------------------------------------------------------------- -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #define EXCEPTION_DATA_SUPPORTS_FUNCTION_FRAGMENTS #endif @@ -909,6 +909,29 @@ BOOL IsFunctionFragment(TADDR baseAddress, PTR_RUNTIME_FUNCTION pFunctionEntry) pUnwindCodes += EpilogCount; } + return ((*pUnwindCodes & 0xFF) == 0xE5); +#elif defined(TARGET_RISCV64) + int EpilogCount = (int)(unwindHeader >> 22) & 0x1F; + int CodeWords = unwindHeader >> 27; + PTR_DWORD pUnwindCodes = (PTR_DWORD)(baseAddress + pFunctionEntry->UnwindData); + // Skip header. + pUnwindCodes++; + + // Skip extended header. + if ((CodeWords == 0) && (EpilogCount == 0)) + { + EpilogCount = (*pUnwindCodes) & 0xFFFF; + pUnwindCodes++; + } + + // Skip epilog scopes. + BOOL Ebit = (unwindHeader >> 21) & 0x1; + if (!Ebit && (EpilogCount != 0)) + { + // EpilogCount is the number of exception scopes defined right after the unwindHeader + pUnwindCodes += EpilogCount; + } + return ((*pUnwindCodes & 0xFF) == 0xE5); #else PORTABILITY_ASSERT("IsFunctionFragnent - NYI on this platform"); @@ -1113,6 +1136,44 @@ PTR_VOID GetUnwindDataBlob(TADDR moduleBase, PTR_RUNTIME_FUNCTION pRuntimeFuncti *pSize = size; return xdata; +#elif defined(TARGET_RISCV64) + // TODO: maybe optimize further. + // if this function uses packed unwind data then at least one of the two least significant bits + // will be non-zero. if this is the case then there will be no xdata record to enumerate. + _ASSERTE((pRuntimeFunction->UnwindData & 0x3) == 0); + + // compute the size of the unwind info + PTR_ULONG xdata = dac_cast(pRuntimeFunction->UnwindData + moduleBase); + ULONG epilogScopes = 0; + ULONG unwindWords = 0; + ULONG size = 0; + + //If both Epilog Count and Code Word is not zero + //Info of Epilog and Unwind scopes are given by 1 word header + //Otherwise this info is given by a 2 word header + if ((xdata[0] >> 27) != 0) + { + size = 4; + epilogScopes = (xdata[0] >> 22) & 0x1f; + unwindWords = (xdata[0] >> 27) & 0x1f; + } + else + { + size = 8; + epilogScopes = xdata[1] & 0xffff; + unwindWords = (xdata[1] >> 16) & 0xff; + } + + if (!(xdata[0] & (1 << 21))) + size += 4 * epilogScopes; + + size += 4 * unwindWords; + + _ASSERTE(xdata[0] & (1 << 20)); // personality routine should be always present + size += 4; // exception handler RVA + + *pSize = size; + return xdata; #else PORTABILITY_ASSERT("GetUnwindDataBlob"); @@ -2235,6 +2296,8 @@ BOOL EEJitManager::LoadJIT() altJitName = MAKEDLLNAME_W(W("clrjit_unix_x64_x64")); #elif defined(TARGET_LOONGARCH64) altJitName = MAKEDLLNAME_W(W("clrjit_unix_loongarch64_loongarch64")); +#elif defined(TARGET_RISCV64) + altJitName = MAKEDLLNAME_W(W("clrjit_unix_riscv64_riscv64")); #endif #endif // TARGET_WINDOWS @@ -2623,7 +2686,7 @@ static size_t GetDefaultReserveForJumpStubs(size_t codeHeapSize) { LIMITED_METHOD_CONTRACT; -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // // Keep a small default reserve at the end of the codeheap for jump stubs. It should reduce // chance that we won't be able allocate jump stub because of lack of suitable address space. @@ -2675,7 +2738,7 @@ HeapList* LoaderCodeHeap::CreateCodeHeap(CodeHeapRequestInfo *pInfo, LoaderHeap bool fAllocatedFromEmergencyJumpStubReserve = false; size_t allocationSize = pCodeHeap->m_LoaderHeap.AllocMem_TotalSize(initialRequestSize); -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) allocationSize += pCodeHeap->m_LoaderHeap.AllocMem_TotalSize(JUMP_ALLOCATE_SIZE); #endif pBaseAddr = (BYTE *)pInfo->m_pAllocator->GetCodeHeapInitialBlock(loAddr, hiAddr, (DWORD)allocationSize, &dwSizeAcquiredFromInitialBlock); @@ -2722,7 +2785,7 @@ HeapList* LoaderCodeHeap::CreateCodeHeap(CodeHeapRequestInfo *pInfo, LoaderHeap // this first allocation is critical as it sets up correctly the loader heap info HeapList *pHp = new HeapList; -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) pHp->CLRPersonalityRoutine = (BYTE *)pCodeHeap->m_LoaderHeap.AllocMem(JUMP_ALLOCATE_SIZE); #else // Ensure that the heap has a reserved block of memory and so the GetReservedBytesFree() @@ -2875,7 +2938,7 @@ HeapList* EEJitManager::NewCodeHeap(CodeHeapRequestInfo *pInfo, DomainCodeHeapLi size_t reserveSize = initialRequestSize; -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) reserveSize += JUMP_ALLOCATE_SIZE; #endif @@ -4422,7 +4485,7 @@ PTR_RUNTIME_FUNCTION EEJitManager::LazyGetFunctionEntry(EECodeInfo * pCodeInfo) if (RUNTIME_FUNCTION__BeginAddress(pFunctionEntry) <= address && address < RUNTIME_FUNCTION__EndAddress(pFunctionEntry, baseAddress)) { -#if defined(EXCEPTION_DATA_SUPPORTS_FUNCTION_FRAGMENTS) && (defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) +#if defined(EXCEPTION_DATA_SUPPORTS_FUNCTION_FRAGMENTS) && (defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)) // If we might have fragmented unwind, and we're on ARM64/LoongArch64, // make sure to returning the root record, // as the trailing records don't have prolog unwind codes. diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index 8e432e6e5209ba..a277674e5a3644 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -478,13 +478,13 @@ struct HeapList size_t maxCodeHeapSize;// Size of the entire contiguous block of memory size_t reserveForJumpStubs; // Amount of memory reserved for jump stubs in this block -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) BYTE* CLRPersonalityRoutine; // jump thunk to personality routine #endif TADDR GetModuleBase() { -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) return (TADDR)CLRPersonalityRoutine; #else return (TADDR)mapBase; diff --git a/src/coreclr/vm/dynamicmethod.cpp b/src/coreclr/vm/dynamicmethod.cpp index 5a140d6ed0a13d..897adc463c82fd 100644 --- a/src/coreclr/vm/dynamicmethod.cpp +++ b/src/coreclr/vm/dynamicmethod.cpp @@ -437,7 +437,7 @@ HeapList* HostCodeHeap::InitializeHeapList(CodeHeapRequestInfo *pInfo) TrackAllocation *pTracker = NULL; -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) pTracker = AllocMemory_NoThrow(0, JUMP_ALLOCATE_SIZE, sizeof(void*), 0); if (pTracker == NULL) diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index 640591c7d9b71f..1e16cb811bc6f9 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -1486,7 +1486,7 @@ bool EECodeManager::IsGcSafe( EECodeInfo *pCodeInfo, return gcInfoDecoder.IsInterruptible(); } -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool EECodeManager::HasTailCalls( EECodeInfo *pCodeInfo) { CONTRACTL { @@ -1504,7 +1504,7 @@ bool EECodeManager::HasTailCalls( EECodeInfo *pCodeInfo) return gcInfoDecoder.HasTailCalls(); } -#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 #if defined(TARGET_AMD64) && defined(_DEBUG) diff --git a/src/coreclr/vm/encee.cpp b/src/coreclr/vm/encee.cpp index 37625bbce4bbf4..19c777431981ff 100644 --- a/src/coreclr/vm/encee.cpp +++ b/src/coreclr/vm/encee.cpp @@ -608,7 +608,7 @@ HRESULT EditAndContinueModule::ResumeInUpdatedFunction( SIZE_T newILOffset, CONTEXT *pOrigContext) { -#if defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) return E_NOTIMPL; #else LOG((LF_ENC, LL_INFO100, "EnCModule::ResumeInUpdatedFunction for %s at IL offset 0x%x, ", diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 1a9666d7d9f7e3..44900ba17d2ff1 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6278,9 +6278,9 @@ IsDebuggerFault(EXCEPTION_RECORD *pExceptionRecord, #endif // TARGET_UNIX -#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) EXTERN_C void JIT_StackProbe_End(); -#endif // TARGET_ARM64 +#endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 #ifdef FEATURE_EH_FUNCLETS @@ -6345,9 +6345,9 @@ bool IsIPInMarkedJitHelper(UINT_PTR uControlPc) CHECK_RANGE(JIT_WriteBarrier) CHECK_RANGE(JIT_CheckedWriteBarrier) CHECK_RANGE(JIT_ByRefWriteBarrier) -#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !(TARGET_RISCV64) CHECK_RANGE(JIT_StackProbe) -#endif // !TARGET_ARM64 +#endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 #else #ifdef TARGET_UNIX CHECK_RANGE(JIT_WriteBarrierGroup) @@ -6469,7 +6469,7 @@ AdjustContextForJITHelpers( Thread::VirtualUnwindToFirstManagedCallFrame(pContext); -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // We had an AV in the writebarrier that needs to be treated // as originating in managed code. At this point, the stack (growing // from left->right) looks like this: @@ -6493,7 +6493,7 @@ AdjustContextForJITHelpers( // Now we save the address back into the context so that it gets used // as the faulting address. SetIP(pContext, ControlPCPostAdjustment); -#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 // Unwind the frame chain - On Win64, this is required since we may handle the managed fault and to do so, // we will replace the exception context with the managed context and "continue execution" there. Thus, we do not diff --git a/src/coreclr/vm/fieldmarshaler.cpp b/src/coreclr/vm/fieldmarshaler.cpp index ea0bbb871cf4fc..3be82af9fcd4cc 100644 --- a/src/coreclr/vm/fieldmarshaler.cpp +++ b/src/coreclr/vm/fieldmarshaler.cpp @@ -421,6 +421,7 @@ UINT32 NativeFieldDescriptor::AlignmentRequirement() const } } +#if 0 PTR_MethodTable NativeFieldDescriptor::GetNestedNativeMethodTable() const { CONTRACT(PTR_MethodTable) @@ -435,6 +436,7 @@ PTR_MethodTable NativeFieldDescriptor::GetNestedNativeMethodTable() const RETURN nestedTypeAndCount.m_pNestedType; } +#endif PTR_FieldDesc NativeFieldDescriptor::GetFieldDesc() const { diff --git a/src/coreclr/vm/fieldmarshaler.h b/src/coreclr/vm/fieldmarshaler.h index 83b2c79fb4f055..80ef481ca8fdd0 100644 --- a/src/coreclr/vm/fieldmarshaler.h +++ b/src/coreclr/vm/fieldmarshaler.h @@ -92,7 +92,20 @@ class NativeFieldDescriptor return m_category; } - PTR_MethodTable GetNestedNativeMethodTable() const; + PTR_MethodTable GetNestedNativeMethodTable() const + { + CONTRACT(PTR_MethodTable) + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + PRECONDITION(IsNestedType()); + POSTCONDITION(CheckPointer(RETVAL)); + } + CONTRACT_END; + + RETURN nestedTypeAndCount.m_pNestedType; + } ULONG GetNumElements() const { diff --git a/src/coreclr/vm/frames.h b/src/coreclr/vm/frames.h index 2261fe4431be1c..83c218b149bad8 100644 --- a/src/coreclr/vm/frames.h +++ b/src/coreclr/vm/frames.h @@ -863,6 +863,9 @@ class RedirectedThreadFrame : public ResumableFrame #elif defined(TARGET_LOONGARCH64) Object** firstIntReg = (Object**)&this->GetContext()->Tp; Object** lastIntReg = (Object**)&this->GetContext()->S8; +#elif defined(TARGET_RISCV64) + Object** firstIntReg = (Object**)&this->GetContext()->Gp; + Object** lastIntReg = (Object**)&this->GetContext()->T6; #else _ASSERTE(!"nyi for platform"); #endif @@ -1904,7 +1907,7 @@ class UnmanagedToManagedFrame : public Frame TADDR m_ReturnAddress; TADDR m_x8; // ret buff arg ArgumentRegisters m_argumentRegisters; -#elif defined (TARGET_LOONGARCH64) +#elif defined (TARGET_LOONGARCH64) || defined (TARGET_RISCV64) TADDR m_fp; TADDR m_ReturnAddress; ArgumentRegisters m_argumentRegisters; diff --git a/src/coreclr/vm/gccover.cpp b/src/coreclr/vm/gccover.cpp index 71a49b63be540a..d69c0906580c3e 100644 --- a/src/coreclr/vm/gccover.cpp +++ b/src/coreclr/vm/gccover.cpp @@ -36,7 +36,7 @@ MethodDesc* AsMethodDesc(size_t addr); static PBYTE getTargetOfCall(PBYTE instrPtr, PCONTEXT regs, PBYTE*nextInstr); -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) static void replaceSafePointInstructionWithGcStressInstr(UINT32 safePointOffset, LPVOID codeStart); static bool replaceInterruptibleRangesWithGcStressInstr (UINT32 startOffset, UINT32 stopOffset, LPVOID codeStart); #endif @@ -97,7 +97,7 @@ bool IsGcCoverageInterruptInstruction(PBYTE instrPtr) { UINT32 instrVal; -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) instrVal = *reinterpret_cast(instrPtr); #elif defined(TARGET_ARM) size_t instrLen = GetARMInstructionLength(instrPtr); @@ -118,7 +118,7 @@ bool IsGcCoverageInterruptInstruction(PBYTE instrPtr) bool IsOriginalInstruction(PBYTE instrPtr, GCCoverageInfo* gcCover, DWORD offset) { -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) UINT32 instrVal = *reinterpret_cast(instrPtr); UINT32 origInstrVal = *reinterpret_cast(gcCover->savedCode + offset); return (instrVal == origInstrVal); @@ -174,7 +174,7 @@ void SetupAndSprinkleBreakpoints( fZapped); // This is not required for ARM* as the above call does the work for both hot & cold regions -#if !defined(TARGET_ARM) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_ARM) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) if (gcCover->methodRegion.coldSize != 0) { gcCover->SprinkleBreakpoints(gcCover->savedCode + gcCover->methodRegion.hotSize, @@ -341,6 +341,8 @@ void ReplaceInstrAfterCall(PBYTE instrToReplace, MethodDesc* callMD) *(DWORD*)instrToReplace = INTERRUPT_INSTR_PROTECT_RET; else *(DWORD*)instrToReplace = INTERRUPT_INSTR; +#elif defined(TARGET_RISCV64) +// #error TODO RISCV64 #else _ASSERTE(!"not implemented for platform"); #endif @@ -624,7 +626,7 @@ void GCCoverageInfo::SprinkleBreakpoints( if ((regionOffsetAdj==0) && (*codeStart != INTERRUPT_INSTR)) doingEpilogChecks = false; -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) //Save the method code from hotRegion memcpy(saveAddr, (BYTE*)methodRegion.hotStartAddress, methodRegion.hotSize); @@ -668,7 +670,7 @@ void GCCoverageInfo::SprinkleBreakpoints( #endif // TARGET_X86 } -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED @@ -769,6 +771,8 @@ void replaceSafePointInstructionWithGcStressInstr(UINT32 safePointOffset, LPVOID { instructionIsACallThroughRegister = TRUE; } +#elif defined(TARGET_RISCV64) +// #error TODO RISCV #endif // _TARGET_XXXX_ // safe point must always be after a call instruction @@ -792,7 +796,7 @@ void replaceSafePointInstructionWithGcStressInstr(UINT32 safePointOffset, LPVOID // safe point will be replaced with appropriate illegal instruction at execution time when reg value is known #if defined(TARGET_ARM) *((WORD*)instrPtrWriterHolder.GetRW()) = INTERRUPT_INSTR_CALL; -#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) *((DWORD*)instrPtrWriterHolder.GetRW()) = INTERRUPT_INSTR_CALL; #endif // _TARGET_XXXX_ } @@ -913,7 +917,7 @@ bool replaceInterruptibleRangesWithGcStressInstr (UINT32 startOffset, UINT32 sto } instrPtrRW += instrLen; -#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) *((DWORD*)instrPtrRW) = INTERRUPT_INSTR; instrPtrRW += 4; #endif // TARGET_XXXX_ @@ -1015,6 +1019,8 @@ static PBYTE getTargetOfCall(PBYTE instrPtr, PCONTEXT regs, PBYTE* nextInstr) { { return 0; // Fail } +#elif defined(TARGET_RISCV64) +// #error TODO RISCV64 #endif #ifdef TARGET_AMD64 @@ -1256,6 +1262,8 @@ void RemoveGcCoverageInterrupt(TADDR instrPtr, BYTE * savedInstrPtr, GCCoverageI *(DWORD *)instrPtrWriterHolder.GetRW() = *(DWORD *)savedInstrPtr; #elif defined(TARGET_LOONGARCH64) *(DWORD *)instrPtrWriterHolder.GetRW() = *(DWORD *)savedInstrPtr; +#elif defined(TARGET_RISCV64) + *(DWORD *)instrPtrWriterHolder.GetRW() = *(DWORD *)savedInstrPtr; #else *(BYTE *)instrPtrWriterHolder.GetRW() = *savedInstrPtr; #endif @@ -1478,6 +1486,14 @@ void DoGcStress (PCONTEXT regs, NativeCodeVersion nativeCodeVersion) atCall = (instrVal == INTERRUPT_INSTR_CALL); afterCallProtect[0] = (instrVal == INTERRUPT_INSTR_PROTECT_RET); +#elif defined(TARGET_RISCV64) + _ASSERTE(!"TODO RISCV64 NYI"); + DWORD instrVal = *(DWORD *)instrPtr; + forceStack[6] = &instrVal; // This is so I can see it fastchecked + + atCall = (instrVal == INTERRUPT_INSTR_CALL); + afterCallProtect[0] = (instrVal == INTERRUPT_INSTR_PROTECT_RET); +// #error TODO RISCV64 #endif // _TARGET_* if (!IsGcCoverageInterruptInstruction(instrPtr)) @@ -1596,7 +1612,7 @@ void DoGcStress (PCONTEXT regs, NativeCodeVersion nativeCodeVersion) } #endif // TARGET_X86 -#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) /* In non-fully interruptible code, if the EIP is just after a call instr means something different because it expects that we are IN the @@ -1652,6 +1668,9 @@ void DoGcStress (PCONTEXT regs, NativeCodeVersion nativeCodeVersion) *(DWORD*)nextInstrWriterHolder.GetRW() = INTERRUPT_INSTR; #elif defined(TARGET_LOONGARCH64) *(DWORD*)nextInstrWriterHolder.GetRW() = INTERRUPT_INSTR; +#elif defined(TARGET_RISCV64) + _ASSERTE(!"TODO RISCV64 NYI"); + *(DWORD*)nextInstrWriterHolder.GetRW() = INTERRUPT_INSTR; #else *nextInstrWriterHolder.GetRW() = INTERRUPT_INSTR; #endif @@ -1734,6 +1753,8 @@ void DoGcStress (PCONTEXT regs, NativeCodeVersion nativeCodeVersion) retValRegs[numberOfRegs++] = regs->X0; #elif defined(TARGET_LOONGARCH64) retValRegs[numberOfRegs++] = regs->A0; +#elif defined(TARGET_RISCV64) + retValRegs[numberOfRegs++] = regs->A0; #endif // TARGET_ARM64 } @@ -1787,6 +1808,8 @@ void DoGcStress (PCONTEXT regs, NativeCodeVersion nativeCodeVersion) regs->X[0] = retValRegs[0]; #elif defined(TARGET_LOONGARCH64) regs->A0 = retValRegs[0]; +#elif defined(TARGET_RISCV64) + regs->A0 = retValRegs[0]; #else PORTABILITY_ASSERT("DoGCStress - return register"); #endif diff --git a/src/coreclr/vm/gccover.h b/src/coreclr/vm/gccover.h index 35207372671d78..3f0a04bcac5a9e 100644 --- a/src/coreclr/vm/gccover.h +++ b/src/coreclr/vm/gccover.h @@ -112,6 +112,12 @@ typedef DPTR(GCCoverageInfo) PTR_GCCoverageInfo; // see code:GCCoverageInfo::sav #define INTERRUPT_INSTR_CALL 0xffffff0e #define INTERRUPT_INSTR_PROTECT_RET 0xffffff0d +#elif defined(TARGET_RISCV64) +// TODO RISCV64 NYI +#define INTERRUPT_INSTR 0xBADC0DE0 +#define INTERRUPT_INSTR_CALL 0xBADC0DE1 +#define INTERRUPT_INSTR_PROTECT_RET 0xBADC0DE2 + #endif // _TARGET_* // The body of this method is in this header file to allow @@ -174,6 +180,9 @@ inline bool IsGcCoverageInterruptInstructionVal(UINT32 instrVal) return false; } } +#elif defined(TARGET_RISCV64) + _ASSERTE(!"TODO RISCV64 NYI"); + return false; #else // x64 and x86 diff --git a/src/coreclr/vm/gcenv.ee.cpp b/src/coreclr/vm/gcenv.ee.cpp index 1ecb498794c4f6..7d9843e062bf07 100644 --- a/src/coreclr/vm/gcenv.ee.cpp +++ b/src/coreclr/vm/gcenv.ee.cpp @@ -927,7 +927,7 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args) // On architectures with strong ordering, we only need to prevent compiler reordering. // Otherwise we put a process-wide fence here (so that we could use an ordinary read in the barrier) -#if defined(HOST_ARM64) || defined(HOST_ARM) || defined(HOST_LOONGARCH64) +#if defined(HOST_ARM64) || defined(HOST_ARM) || defined(HOST_LOONGARCH64) || defined(HOST_RISCV64) if (!is_runtime_suspended) { // If runtime is not suspended, force all threads to see the changed table before seeing updated heap boundaries. @@ -939,7 +939,7 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args) g_lowest_address = args->lowest_address; g_highest_address = args->highest_address; -#if defined(HOST_ARM64) || defined(HOST_ARM) || defined(HOST_LOONGARCH64) +#if defined(HOST_ARM64) || defined(HOST_ARM) || defined(HOST_LOONGARCH64) || defined(HOST_RISCV64) // Need to reupdate for changes to g_highest_address g_lowest_address stompWBCompleteActions |= ::StompWriteBarrierResize(is_runtime_suspended, args->requires_upper_bounds_check); @@ -979,7 +979,7 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args) // (we care only about managed threads and suspend/resume will do full fences - good enough for us). // -#if defined(HOST_ARM64) || defined(HOST_ARM) || defined(HOST_LOONGARCH64) +#if defined(HOST_ARM64) || defined(HOST_ARM) || defined(HOST_LOONGARCH64) || defined(HOST_RISCV64) is_runtime_suspended = (stompWBCompleteActions & SWB_EE_RESTART) || is_runtime_suspended; if (!is_runtime_suspended) { diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index 67fbb3b25e2548..4acd0b12121e03 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -133,7 +133,7 @@ GcInfoDecoder::GcInfoDecoder( int hasStackBaseRegister = headerFlags & GC_INFO_HAS_STACK_BASE_REGISTER; #ifdef TARGET_AMD64 m_WantsReportOnlyLeaf = ((headerFlags & GC_INFO_WANTS_REPORT_ONLY_LEAF) != 0); -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) m_HasTailCalls = ((headerFlags & GC_INFO_HAS_TAILCALLS) != 0); #endif // TARGET_AMD64 int hasEncInfo = headerFlags & GC_INFO_HAS_EDIT_AND_CONTINUE_INFO; @@ -144,7 +144,7 @@ GcInfoDecoder::GcInfoDecoder( (ReturnKind)((UINT32)m_Reader.Read(returnKindBits)); remainingFlags &= ~(DECODE_RETURN_KIND | DECODE_VARARG); -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) remainingFlags &= ~DECODE_HAS_TAILCALLS; #endif if (remainingFlags == 0) @@ -383,7 +383,7 @@ bool GcInfoDecoder::IsSafePoint(UINT32 codeOffset) if(m_NumSafePoints == 0) return false; -#if defined(TARGET_AMD64) || defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Safepoints are encoded with a -1 adjustment codeOffset--; #endif @@ -403,7 +403,7 @@ UINT32 GcInfoDecoder::FindSafePoint(UINT32 breakOffset) const UINT32 numBitsPerOffset = CeilOfLog2(NORMALIZE_CODE_OFFSET(m_CodeLength)); UINT32 result = m_NumSafePoints; -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Safepoints are encoded with a -1 adjustment // but normalizing them masks off the low order bit // Thus only bother looking if the address is odd @@ -450,7 +450,7 @@ void GcInfoDecoder::EnumerateSafePoints(EnumerateSafePointsCallback *pCallback, UINT32 normOffset = (UINT32)m_Reader.Read(numBitsPerOffset); UINT32 offset = DENORMALIZE_CODE_OFFSET(normOffset) + 2; -#if defined(TARGET_AMD64) || defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Safepoints are encoded with a -1 adjustment offset--; #endif @@ -536,13 +536,13 @@ bool GcInfoDecoder::GetIsVarArg() return m_IsVarArg; } -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool GcInfoDecoder::HasTailCalls() { _ASSERTE( m_Flags & DECODE_HAS_TAILCALLS ); return m_HasTailCalls; } -#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 bool GcInfoDecoder::WantsReportOnlyLeaf() { @@ -1893,6 +1893,144 @@ void GcInfoDecoder::ReportRegisterToGC( pCallBack(hCallBack, pObjRef, gcFlags DAC_ARG(DacSlotLocation(regNum, 0, false))); } +#elif defined(TARGET_RISCV64) + +#if defined(TARGET_UNIX) && !defined(FEATURE_NATIVEAOT) +OBJECTREF* GcInfoDecoder::GetCapturedRegister( + int regNum, + PREGDISPLAY pRD + ) +{ + _ASSERTE(regNum >= 1 && regNum <= 31); + + // The fields of CONTEXT are in the same order as + // the processor encoding numbers. + + DWORD64 *pR0 = &pRD->pCurrentContext->R0; + + return (OBJECTREF*)(pR0 + regNum); +} +#endif // TARGET_UNIX && !FEATURE_NATIVEAOT + +OBJECTREF* GcInfoDecoder::GetRegisterSlot( + int regNum, + PREGDISPLAY pRD + ) +{ + _ASSERTE((regNum == 1) || (regNum >= 5 && regNum <= 31)); + +#ifdef FEATURE_NATIVEAOT + PTR_UIntNative* ppReg = &pRD->pR0; + + return (OBJECTREF*)*(ppReg + regNum); +#else + if(regNum == 1) + { + return (OBJECTREF*) pRD->pCurrentContextPointers->Ra; + } + else if (regNum < 8) + { + return (OBJECTREF*)*(DWORD64**)(&pRD->volatileCurrContextPointers.T0 + (regNum - 5)); + } + else if(regNum == 8) + { + return (OBJECTREF*) pRD->pCurrentContextPointers->Fp; + } + else if (regNum == 9) + { + return (OBJECTREF*) pRD->pCurrentContextPointers->S1; + } + else if (regNum < 18) + { + return (OBJECTREF*)*(DWORD64**)(&pRD->volatileCurrContextPointers.A0 + (regNum - 10)); + } + else if (regNum < 28) + { + return (OBJECTREF*)*(DWORD64**)(&pRD->pCurrentContextPointers->S2 + (regNum-18)); + } + return (OBJECTREF*)*(DWORD64**)(&pRD->volatileCurrContextPointers.T3 + (regNum-28)); +#endif +} + +bool GcInfoDecoder::IsScratchRegister(int regNum, PREGDISPLAY pRD) +{ + _ASSERTE(regNum >= 0 && regNum <= 31); + + return (regNum >= 5 && regNum <= 7) || (regNum >= 10 and regNum <= 17) || regNum >= 28 || regNum == 1; +} + +bool GcInfoDecoder::IsScratchStackSlot(INT32 spOffset, GcStackSlotBase spBase, PREGDISPLAY pRD) +{ +#ifdef FIXED_STACK_PARAMETER_SCRATCH_AREA + _ASSERTE( m_Flags & DECODE_GC_LIFETIMES ); + + TADDR pSlot = (TADDR) GetStackSlot(spOffset, spBase, pRD); + _ASSERTE(pSlot >= pRD->SP); + + return (pSlot < pRD->SP + m_SizeOfStackOutgoingAndScratchArea); +#else + return FALSE; +#endif +} + +void GcInfoDecoder::ReportRegisterToGC( + int regNum, + unsigned gcFlags, + PREGDISPLAY pRD, + unsigned flags, + GCEnumCallback pCallBack, + void * hCallBack) +{ + GCINFODECODER_CONTRACT; + + _ASSERTE(regNum > 0 && regNum <= 31); + + LOG((LF_GCROOTS, LL_INFO1000, "Reporting " FMT_REG, regNum )); + + OBJECTREF* pObjRef = GetRegisterSlot( regNum, pRD ); +#if defined(TARGET_UNIX) && !defined(FEATURE_NATIVEAOT) && !defined(SOS_TARGET_AMD64) + + // On PAL, we don't always have the context pointers available due to + // a limitation of an unwinding library. In such case, the context + // pointers for some nonvolatile registers are NULL. + // In such case, we let the pObjRef point to the captured register + // value in the context and pin the object itself. + if (pObjRef == NULL) + { + // Report a pinned object to GC only in the promotion phase when the + // GC is scanning roots. + GCCONTEXT* pGCCtx = (GCCONTEXT*)(hCallBack); + if (!pGCCtx->sc->promotion) + { + return; + } + + pObjRef = GetCapturedRegister(regNum, pRD); + + gcFlags |= GC_CALL_PINNED; + } +#endif // TARGET_UNIX && !SOS_TARGET_ARM64 + +#ifdef _DEBUG + if(IsScratchRegister(regNum, pRD)) + { + // Scratch registers cannot be reported for non-leaf frames + _ASSERTE(flags & ActiveStackFrame); + } + + LOG((LF_GCROOTS, LL_INFO1000, /* Part Two */ + "at" FMT_ADDR "as ", DBG_ADDR(pObjRef) )); + + VALIDATE_ROOT((gcFlags & GC_CALL_INTERIOR), hCallBack, pObjRef); + + LOG_PIPTR(pObjRef, gcFlags, hCallBack); +#endif //_DEBUG + + gcFlags |= CHECK_APP_DOMAIN; + + pCallBack(hCallBack, pObjRef, gcFlags DAC_ARG(DacSlotLocation(regNum, 0, false))); +} + #else // Unknown platform OBJECTREF* GcInfoDecoder::GetRegisterSlot( @@ -1980,6 +2118,8 @@ int GcInfoDecoder::GetStackReg(int spBase) int esp = 31; #elif defined(TARGET_LOONGARCH64) int esp = 3; +#elif defined(TARGET_RISCV64) + int esp = 2; #endif if( GC_SP_REL == spBase ) diff --git a/src/coreclr/vm/interpreter.cpp b/src/coreclr/vm/interpreter.cpp index 3eafbc0e7557e3..6938820a7c759e 100644 --- a/src/coreclr/vm/interpreter.cpp +++ b/src/coreclr/vm/interpreter.cpp @@ -91,7 +91,7 @@ InterpreterMethodInfo::InterpreterMethodInfo(CEEInfo* comp, CORINFO_METHOD_INFO* } #endif -#if defined(UNIX_AMD64_ABI) || defined(HOST_LOONGARCH64) +#if defined(UNIX_AMD64_ABI) || defined(HOST_LOONGARCH64) || defined(HOST_RISCV64) // ...or it fits into two registers. if (hasRetBuff && getClassSize(methInfo->args.retTypeClass) <= 2 * sizeof(void*)) { @@ -537,6 +537,9 @@ void Interpreter::ArgState::AddArg(unsigned canonIndex, short numSlots, bool noR #elif defined(HOST_LOONGARCH64) callerArgStackSlots += numSlots; ClrSafeInt offset(-callerArgStackSlots); +#elif defined(HOST_RISCV64) + callerArgStackSlots += numSlots; + ClrSafeInt offset(-callerArgStackSlots); #endif offset *= static_cast(sizeof(void*)); _ASSERTE(!offset.IsOverflow()); @@ -700,7 +703,17 @@ void Interpreter::ArgState::AddFPArg(unsigned canonIndex, unsigned short numSlot fpArgsUsed |= (0x1 << (numFPRegArgSlots + i)); } numFPRegArgSlots += numSlots; +#elif defined(HOST_RISCV64) + assert(numFPRegArgSlots + numSlots <= MaxNumFPRegArgSlots); + assert(!twoSlotAlign); + argIsReg[canonIndex] = ARS_FloatReg; + argOffsets[canonIndex] = numFPRegArgSlots * sizeof(void*); + for (unsigned i = 0; i < numSlots; i++) + { + fpArgsUsed |= (0x1 << (numFPRegArgSlots + i)); + } + numFPRegArgSlots += numSlots; #else #error "Unsupported architecture" #endif @@ -1125,7 +1138,7 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp, #elif defined(HOST_ARM) // LONGS have 2-reg alignment; inc reg if necessary. argState.AddArg(k, 2, /*noReg*/false, /*twoSlotAlign*/true); -#elif defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) +#elif defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) || defined(HOST_RISCV64) argState.AddArg(k); #else #error unknown platform @@ -1138,7 +1151,7 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp, argState.AddArg(k, 1, /*noReg*/true); #elif defined(HOST_ARM) argState.AddFPArg(k, 1, /*twoSlotAlign*/false); -#elif defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) +#elif defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) || defined(HOST_RISCV64) argState.AddFPArg(k, 1, false); #else #error unknown platform @@ -1151,7 +1164,7 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp, argState.AddArg(k, 2, /*noReg*/true); #elif defined(HOST_ARM) argState.AddFPArg(k, 2, /*twoSlotAlign*/true); -#elif defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) +#elif defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) || defined(HOST_RISCV64) argState.AddFPArg(k, 1, false); #else #error unknown platform @@ -1194,6 +1207,8 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp, } #elif defined(HOST_LOONGARCH64) argState.AddArg(k, static_cast(szSlots)); +#elif defined(HOST_RISCV64) + argState.AddArg(k, static_cast(szSlots)); #else #error unknown platform #endif @@ -1250,6 +1265,9 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp, // See StubLinkerCPU::EmitProlog for the layout of the stack unsigned intRegArgBaseOffset = (argState.numFPRegArgSlots) * sizeof(void*); unsigned short stackArgBaseOffset = (unsigned short) ((argState.numRegArgs + argState.numFPRegArgSlots) * sizeof(void*)); +#elif defined(HOST_RISCV64) + unsigned intRegArgBaseOffset = (argState.numFPRegArgSlots) * sizeof(void*); + unsigned short stackArgBaseOffset = (unsigned short) ((argState.numRegArgs + argState.numFPRegArgSlots) * sizeof(void*)); #else #error unsupported platform #endif @@ -1300,6 +1318,8 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp, argState.argOffsets[k] = (regArgsFound - 1) * sizeof(void*); #elif defined(HOST_LOONGARCH64) argState.argOffsets[k] += intRegArgBaseOffset; +#elif defined(HOST_RISCV64) + argState.argOffsets[k] += intRegArgBaseOffset; #else #error unsupported platform #endif @@ -1614,7 +1634,8 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp, #elif defined(HOST_LOONGARCH64) assert(!"unimplemented on LOONGARCH yet"); - +#elif defined(HOST_RISCV64) + assert(!"unimplemented on RISCV64 yet"); #else #error unsupported platform #endif @@ -6308,6 +6329,9 @@ void Interpreter::MkRefany() #elif defined(HOST_LOONGARCH64) tbr = NULL; NYI_INTERP("Unimplemented code: MkRefAny on LOONGARCH"); +#elif defined(HOST_RISCV64) + tbr = NULL; + NYI_INTERP("Unimplemented code: MkRefAny on RISCV64"); #else #error "unsupported platform" #endif @@ -9446,6 +9470,8 @@ void Interpreter::DoCallWork(bool virtualCall, void* thisArg, CORINFO_RESOLVED_T unsigned totalArgSlots = nSlots; #elif defined(HOST_LOONGARCH64) unsigned totalArgSlots = nSlots; +#elif defined(HOST_RISCV64) + unsigned totalArgSlots = nSlots; #else #error "unsupported platform" #endif diff --git a/src/coreclr/vm/interpreter.h b/src/coreclr/vm/interpreter.h index c76c4160c80cd4..51a82a6da047af 100644 --- a/src/coreclr/vm/interpreter.h +++ b/src/coreclr/vm/interpreter.h @@ -996,6 +996,8 @@ class Interpreter #endif #elif defined(HOST_LOONGARCH64) static const int MaxNumFPRegArgSlots = 8; +#elif defined(HOST_RISCV64) + static const int MaxNumFPRegArgSlots = 8; #endif ~ArgState() @@ -2056,6 +2058,8 @@ unsigned short Interpreter::NumberOfIntegerRegArgs() { return 4; } unsigned short Interpreter::NumberOfIntegerRegArgs() { return 8; } #elif defined(HOST_LOONGARCH64) unsigned short Interpreter::NumberOfIntegerRegArgs() { return 8; } +#elif defined(HOST_RISCV64) +unsigned short Interpreter::NumberOfIntegerRegArgs() { return 8; } #else #error Unsupported architecture. #endif diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index cd0161a4aec634..f03dabfeb53ac4 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -9439,6 +9439,8 @@ uint32_t CEEInfo::getLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE c #if defined(TARGET_LOONGARCH64) size = (uint32_t)MethodTable::GetLoongArch64PassStructInRegisterFlags(cls); +#elif defined(TARGET_RISCV64) + size = (uint32_t)MethodTable::GetRiscv64PassStructInRegisterFlags(cls); #endif EE_TO_JIT_TRANSITION_LEAF(); @@ -11022,6 +11024,12 @@ void reservePersonalityRoutineSpace(uint32_t &unwindSize) // The JIT passes in a 4-byte aligned block of unwind data. _ASSERTE(IS_ALIGNED(unwindSize, sizeof(ULONG))); + // Add space for personality routine, it must be 4-byte aligned. + unwindSize += sizeof(ULONG); +#elif defined(TARGET_RISCV64) + // The JIT passes in a 4-byte aligned block of unwind data. + _ASSERTE(IS_ALIGNED(unwindSize, sizeof(ULONG))); + // Add space for personality routine, it must be 4-byte aligned. unwindSize += sizeof(ULONG); #else @@ -11246,6 +11254,14 @@ void CEEJitInfo::allocUnwindInfo ( ULONG * pPersonalityRoutineRW = (ULONG*)((BYTE *)pUnwindInfoRW + ALIGN_UP(unwindSize, sizeof(ULONG))); *pPersonalityRoutineRW = ExecutionManager::GetCLRPersonalityRoutineValue(); +#elif defined(TARGET_RISCV64) + *(LONG *)pUnwindInfoRW |= (1 << 20); // X bit + + ULONG * pPersonalityRoutineRW = (ULONG*)((BYTE *)pUnwindInfoRW + ALIGN_UP(unwindSize, sizeof(ULONG))); + *pPersonalityRoutineRW = ExecutionManager::GetCLRPersonalityRoutineValue(); + +; + #endif EE_TO_JIT_TRANSITION(); diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index cc2cccca72a096..dd9de6b0be98bb 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -401,7 +401,7 @@ extern "C" void STDCALL JIT_MemCpy(void *dest, const void *src, SIZE_T count); void STDMETHODCALLTYPE JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle); -#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !(TARGET_RISCV64) void STDCALL JIT_StackProbe(); #endif // TARGET_ARM64 }; diff --git a/src/coreclr/vm/methodtable.cpp b/src/coreclr/vm/methodtable.cpp index 3837a9445d24f2..33dde73501e7c7 100644 --- a/src/coreclr/vm/methodtable.cpp +++ b/src/coreclr/vm/methodtable.cpp @@ -3473,6 +3473,616 @@ int MethodTable::GetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cl } #endif +#if defined(TARGET_RISCV64) + +bool MethodTable::IsRiscv64OnlyOneField(MethodTable * pMT) +{ + TypeHandle th(pMT); + + bool useNativeLayout = false; + bool ret = false; + MethodTable* pMethodTable = nullptr; + + if (!th.IsTypeDesc()) + { + pMethodTable = th.AsMethodTable(); + if (pMethodTable->HasLayout()) + { + useNativeLayout = true; + } + else if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/) + { + DWORD numIntroducedFields = pMethodTable->GetNumIntroducedInstanceFields(); + + if (numIntroducedFields == 1) + { + FieldDesc *pFieldStart = pMethodTable->GetApproxFieldDescListRaw(); + + CorElementType fieldType = pFieldStart[0].GetFieldType(); + + if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType)) + { + ret = true; + } + else if (fieldType == ELEMENT_TYPE_VALUETYPE) + { + pMethodTable = pFieldStart->GetApproxFieldTypeHandleThrowing().GetMethodTable(); + if (pMethodTable->GetNumIntroducedInstanceFields() == 1) + { + ret = IsRiscv64OnlyOneField(pMethodTable); + } + } + } + goto _End_arg; + } + } + else + { + _ASSERTE(th.IsNativeValueType()); + + useNativeLayout = true; + pMethodTable = th.AsNativeValueType(); + } + _ASSERTE(pMethodTable != nullptr); + + if (useNativeLayout) + { + if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/) + { + DWORD numIntroducedFields = pMethodTable->GetNativeLayoutInfo()->GetNumFields(); + FieldDesc *pFieldStart = nullptr; + + if (numIntroducedFields == 1) + { + pFieldStart = pMethodTable->GetApproxFieldDescListRaw(); + + CorElementType fieldType = pFieldStart->GetFieldType(); + + bool isFixedBuffer = (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType) + || fieldType == ELEMENT_TYPE_VALUETYPE) + && (pFieldStart->GetOffset() == 0) + && pMethodTable->HasLayout() + && (pMethodTable->GetNumInstanceFieldBytes() % pFieldStart->GetSize() == 0); + + if (isFixedBuffer) + { + numIntroducedFields = pMethodTable->GetNumInstanceFieldBytes() / pFieldStart->GetSize(); + if (numIntroducedFields != 1) + { + goto _End_arg; + } + } + + if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType)) + { + ret = true; + } + else if (fieldType == ELEMENT_TYPE_VALUETYPE) + { + const NativeFieldDescriptor *pNativeFieldDescs = pMethodTable->GetNativeLayoutInfo()->GetNativeFieldDescriptors(); + NativeFieldCategory nfc = pNativeFieldDescs->GetCategory(); + if (nfc == NativeFieldCategory::NESTED) + { + pMethodTable = pNativeFieldDescs->GetNestedNativeMethodTable(); + ret = IsRiscv64OnlyOneField(pMethodTable); + } + else if (nfc != NativeFieldCategory::ILLEGAL) + { + ret = true; + } + } + } + else + { + ret = false; + } + } + } +_End_arg: + + return ret; +} + +int MethodTable::GetRiscv64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls) +{ + TypeHandle th(cls); + + bool useNativeLayout = false; + int size = STRUCT_NO_FLOAT_FIELD; + MethodTable* pMethodTable = nullptr; + + if (!th.IsTypeDesc()) + { + pMethodTable = th.AsMethodTable(); + if (pMethodTable->HasLayout()) + { + useNativeLayout = true; + } + else if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/) + { + DWORD numIntroducedFields = pMethodTable->GetNumIntroducedInstanceFields(); + + if (numIntroducedFields == 1) + { + FieldDesc *pFieldStart = pMethodTable->GetApproxFieldDescListRaw(); + + CorElementType fieldType = pFieldStart[0].GetFieldType(); + + if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType)) + { + if (fieldType == ELEMENT_TYPE_R4) + { + size = STRUCT_FLOAT_FIELD_ONLY_ONE; + } + else if (fieldType == ELEMENT_TYPE_R8) + { + size = STRUCT_FLOAT_FIELD_ONLY_ONE | STRUCT_FIRST_FIELD_SIZE_IS8; + } + } + else if (fieldType == ELEMENT_TYPE_VALUETYPE) + { + pMethodTable = pFieldStart->GetApproxFieldTypeHandleThrowing().GetMethodTable(); + size = GetRiscv64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable); + } + } + else if (numIntroducedFields == 2) + { + FieldDesc *pFieldSecond; + FieldDesc *pFieldFirst = pMethodTable->GetApproxFieldDescListRaw(); + if (pFieldFirst->GetOffset() == 0) + { + pFieldSecond = pFieldFirst + 1; + } + else + { + pFieldSecond = pFieldFirst; + pFieldFirst = pFieldFirst + 1; + } + assert(pFieldFirst->GetOffset() == 0); + + if (pFieldFirst->GetSize() > 8) + { + goto _End_arg; + } + + CorElementType fieldType = pFieldFirst[0].GetFieldType(); + if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType)) + { + if (fieldType == ELEMENT_TYPE_R4) + { + size = STRUCT_FLOAT_FIELD_FIRST; + } + else if (fieldType == ELEMENT_TYPE_R8) + { + size = STRUCT_FIRST_FIELD_DOUBLE; + } + else if (pFieldFirst[0].GetSize() == 8) + { + size = STRUCT_FIRST_FIELD_SIZE_IS8; + } + + } + else if (fieldType == ELEMENT_TYPE_VALUETYPE) + { + pMethodTable = pFieldFirst->GetApproxFieldTypeHandleThrowing().GetMethodTable(); + if (IsRiscv64OnlyOneField(pMethodTable)) + { + size = GetRiscv64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable); + if ((size & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0) + { + size = pFieldFirst[0].GetSize() == 8 ? STRUCT_FIRST_FIELD_DOUBLE : STRUCT_FLOAT_FIELD_FIRST; + } + else if (size == STRUCT_NO_FLOAT_FIELD) + { + size = pFieldFirst[0].GetSize() == 8 ? STRUCT_FIRST_FIELD_SIZE_IS8: 0; + } + else + { + size = STRUCT_NO_FLOAT_FIELD; + goto _End_arg; + } + } + else + { + size = STRUCT_NO_FLOAT_FIELD; + goto _End_arg; + } + } + else if (pFieldFirst[0].GetSize() == 8) + { + size = STRUCT_FIRST_FIELD_SIZE_IS8; + } + + fieldType = pFieldSecond[0].GetFieldType(); + if (pFieldSecond[0].GetSize() > 8) + { + size = STRUCT_NO_FLOAT_FIELD; + goto _End_arg; + } + else if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType)) + { + if (fieldType == ELEMENT_TYPE_R4) + { + size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND); + } + else if (fieldType == ELEMENT_TYPE_R8) + { + size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE); + } + else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0) + { + size = STRUCT_NO_FLOAT_FIELD; + } + else if (pFieldSecond[0].GetSize() == 8) + { + size |= STRUCT_SECOND_FIELD_SIZE_IS8; + } + } + else if (fieldType == ELEMENT_TYPE_VALUETYPE) + { + pMethodTable = pFieldSecond[0].GetApproxFieldTypeHandleThrowing().GetMethodTable(); + if (IsRiscv64OnlyOneField(pMethodTable)) + { + int size2 = GetRiscv64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable); + if ((size2 & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0) + { + if (pFieldSecond[0].GetSize() == 8) + { + size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE); + } + else + { + size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND); + } + } + else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0) + { + size = STRUCT_NO_FLOAT_FIELD; + } + else if (size2 == STRUCT_NO_FLOAT_FIELD) + { + size |= pFieldSecond[0].GetSize() == 8 ? STRUCT_SECOND_FIELD_SIZE_IS8 : 0; + } + else + { + size = STRUCT_NO_FLOAT_FIELD; + } + } + else + { + size = STRUCT_NO_FLOAT_FIELD; + } + } + else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0) + { + size = STRUCT_NO_FLOAT_FIELD; + } + else if (pFieldSecond[0].GetSize() == 8) + { + size |= STRUCT_SECOND_FIELD_SIZE_IS8; + } + } + + goto _End_arg; + } + } + else + { + _ASSERTE(th.IsNativeValueType()); + + useNativeLayout = true; + pMethodTable = th.AsNativeValueType(); + } + _ASSERTE(pMethodTable != nullptr); + + if (useNativeLayout) + { + if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/) + { + DWORD numIntroducedFields = pMethodTable->GetNativeLayoutInfo()->GetNumFields(); + FieldDesc *pFieldStart = nullptr; + + if (numIntroducedFields == 1) + { + pFieldStart = pMethodTable->GetApproxFieldDescListRaw(); + + CorElementType fieldType = pFieldStart->GetFieldType(); + + bool isFixedBuffer = (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType) + || fieldType == ELEMENT_TYPE_VALUETYPE) + && (pFieldStart->GetOffset() == 0) + && pMethodTable->HasLayout() + && (pMethodTable->GetNumInstanceFieldBytes() % pFieldStart->GetSize() == 0); + + if (isFixedBuffer) + { + numIntroducedFields = pMethodTable->GetNumInstanceFieldBytes() / pFieldStart->GetSize(); + if (numIntroducedFields > 2) + { + goto _End_arg; + } + + if (fieldType == ELEMENT_TYPE_R4) + { + if (numIntroducedFields == 1) + { + size = STRUCT_FLOAT_FIELD_ONLY_ONE; + } + else if (numIntroducedFields == 2) + { + size = STRUCT_FLOAT_FIELD_ONLY_TWO; + } + goto _End_arg; + } + else if (fieldType == ELEMENT_TYPE_R8) + { + if (numIntroducedFields == 1) + { + size = STRUCT_FLOAT_FIELD_ONLY_ONE | STRUCT_FIRST_FIELD_SIZE_IS8; + } + else if (numIntroducedFields == 2) + { + size = STRUCT_FIELD_TWO_DOUBLES; + } + goto _End_arg; + } + } + + if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType)) + { + if (fieldType == ELEMENT_TYPE_R4) + { + size = STRUCT_FLOAT_FIELD_ONLY_ONE; + } + else if (fieldType == ELEMENT_TYPE_R8) + { + size = STRUCT_FLOAT_FIELD_ONLY_ONE | STRUCT_FIRST_FIELD_SIZE_IS8; + } + } + else if (fieldType == ELEMENT_TYPE_VALUETYPE) + { + const NativeFieldDescriptor *pNativeFieldDescs = pMethodTable->GetNativeLayoutInfo()->GetNativeFieldDescriptors(); + NativeFieldCategory nfc = pNativeFieldDescs->GetCategory(); + if (nfc == NativeFieldCategory::NESTED) + { + pMethodTable = pNativeFieldDescs->GetNestedNativeMethodTable(); + size = GetRiscv64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable); + return size; + } + else if (nfc == NativeFieldCategory::FLOAT) + { + if (pFieldStart->GetSize() == 4) + { + size = STRUCT_FLOAT_FIELD_ONLY_ONE; + } + else if (pFieldStart->GetSize() == 8) + { + size = STRUCT_FLOAT_FIELD_ONLY_ONE | STRUCT_FIRST_FIELD_SIZE_IS8; + } + } + } + } + else if (numIntroducedFields == 2) + { + pFieldStart = pMethodTable->GetApproxFieldDescListRaw(); + + if (pFieldStart->GetSize() > 8) + { + goto _End_arg; + } + + if (pFieldStart->GetOffset() || !pFieldStart[1].GetOffset() || (pFieldStart[0].GetSize() > pFieldStart[1].GetOffset())) + { + goto _End_arg; + } + + CorElementType fieldType = pFieldStart[0].GetFieldType(); + if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType)) + { + if (fieldType == ELEMENT_TYPE_R4) + { + size = STRUCT_FLOAT_FIELD_FIRST; + } + else if (fieldType == ELEMENT_TYPE_R8) + { + size = STRUCT_FIRST_FIELD_DOUBLE; + } + else if (pFieldStart[0].GetSize() == 8) + { + size = STRUCT_FIRST_FIELD_SIZE_IS8; + } + + fieldType = pFieldStart[1].GetFieldType(); + if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType)) + { + if (fieldType == ELEMENT_TYPE_R4) + { + size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND); + } + else if (fieldType == ELEMENT_TYPE_R8) + { + size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE); + } + else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0) + { + size = STRUCT_NO_FLOAT_FIELD; + } + else if (pFieldStart[1].GetSize() == 8) + { + size |= STRUCT_SECOND_FIELD_SIZE_IS8; + } + goto _End_arg; + } + } + else if (fieldType == ELEMENT_TYPE_VALUETYPE) + { + const NativeFieldDescriptor *pNativeFieldDescs = pMethodTable->GetNativeLayoutInfo()->GetNativeFieldDescriptors(); + + NativeFieldCategory nfc = pNativeFieldDescs->GetCategory(); + + if (nfc == NativeFieldCategory::NESTED) + { + if (pNativeFieldDescs->GetNumElements() != 1) + { + size = STRUCT_NO_FLOAT_FIELD; + goto _End_arg; + } + + MethodTable* pMethodTable2 = pNativeFieldDescs->GetNestedNativeMethodTable(); + + if (!IsRiscv64OnlyOneField(pMethodTable2)) + { + size = STRUCT_NO_FLOAT_FIELD; + goto _End_arg; + } + + size = GetRiscv64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable2); + if ((size & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0) + { + if (pFieldStart->GetSize() == 8) + { + size = STRUCT_FIRST_FIELD_DOUBLE; + } + else + { + size = STRUCT_FLOAT_FIELD_FIRST; + } + } + else if (pFieldStart->GetSize() == 8) + { + size = STRUCT_FIRST_FIELD_SIZE_IS8; + } + else + { + size = STRUCT_NO_FLOAT_FIELD; + goto _End_arg; + } + } + else if (nfc == NativeFieldCategory::FLOAT) + { + if (pFieldStart[0].GetSize() == 4) + { + size = STRUCT_FLOAT_FIELD_FIRST; + } + else if (pFieldStart[0].GetSize() == 8) + { + _ASSERTE(pMethodTable->GetNativeSize() == 8); + size = STRUCT_FIRST_FIELD_DOUBLE; + } + } + else if (pFieldStart[0].GetSize() == 8) + { + size = STRUCT_FIRST_FIELD_SIZE_IS8; + } + } + else if (pFieldStart[0].GetSize() == 8) + { + size = STRUCT_FIRST_FIELD_SIZE_IS8; + } + + fieldType = pFieldStart[1].GetFieldType(); + if (pFieldStart[1].GetSize() > 8) + { + size = STRUCT_NO_FLOAT_FIELD; + goto _End_arg; + } + else if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType)) + { + if (fieldType == ELEMENT_TYPE_R4) + { + size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND); + } + else if (fieldType == ELEMENT_TYPE_R8) + { + size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE); + } + else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0) + { + size = STRUCT_NO_FLOAT_FIELD; + } + else if (pFieldStart[1].GetSize() == 8) + { + size |= STRUCT_SECOND_FIELD_SIZE_IS8; + } + } + else if (fieldType == ELEMENT_TYPE_VALUETYPE) + { + const NativeFieldDescriptor *pNativeFieldDescs = pMethodTable->GetNativeLayoutInfo()->GetNativeFieldDescriptors(); + NativeFieldCategory nfc = pNativeFieldDescs[1].GetCategory(); + + if (nfc == NativeFieldCategory::NESTED) + { + if (pNativeFieldDescs[1].GetNumElements() != 1) + { + size = STRUCT_NO_FLOAT_FIELD; + goto _End_arg; + } + + MethodTable* pMethodTable2 = pNativeFieldDescs[1].GetNestedNativeMethodTable(); + + if (!IsRiscv64OnlyOneField(pMethodTable2)) + { + size = STRUCT_NO_FLOAT_FIELD; + goto _End_arg; + } + + if ((GetRiscv64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable2) & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0) + { + if (pFieldStart[1].GetSize() == 4) + { + size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND); + } + else if (pFieldStart[1].GetSize() == 8) + { + size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE); + } + } + else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0) + { + size = STRUCT_NO_FLOAT_FIELD; + } + else if (pFieldStart[1].GetSize() == 8) + { + size |= STRUCT_SECOND_FIELD_SIZE_IS8; + } + } + else if (nfc == NativeFieldCategory::FLOAT) + { + if (pFieldStart[1].GetSize() == 4) + { + size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND); + } + else if (pFieldStart[1].GetSize() == 8) + { + size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE); + } + } + else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0) + { + size = STRUCT_NO_FLOAT_FIELD; + } + else if (pFieldStart[1].GetSize() == 8) + { + size |= STRUCT_SECOND_FIELD_SIZE_IS8; + } + } + else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0) + { + size = STRUCT_NO_FLOAT_FIELD; + } + else if (pFieldStart[1].GetSize() == 8) + { + size |= STRUCT_SECOND_FIELD_SIZE_IS8; + } + } + } + } +_End_arg: + + return size; +} +#endif + #if !defined(DACCESS_COMPILE) //========================================================================================== void MethodTable::AllocateRegularStaticBoxes() diff --git a/src/coreclr/vm/methodtable.h b/src/coreclr/vm/methodtable.h index 2a12d9aee125c2..c5ffcc68f71082 100644 --- a/src/coreclr/vm/methodtable.h +++ b/src/coreclr/vm/methodtable.h @@ -752,6 +752,11 @@ class MethodTable static int GetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE clh); #endif +#if defined(TARGET_RISCV64) + static bool IsRiscv64OnlyOneField(MethodTable * pMT); + static int GetRiscv64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE clh); +#endif + #if defined(UNIX_AMD64_ABI_ITF) // Builds the internal data structures and classifies struct eightbytes for Amd System V calling convention. bool ClassifyEightBytes(SystemVStructRegisterPassingHelperPtr helperPtr, unsigned int nestingLevel, unsigned int startOffsetOfStruct, bool isNativeStruct); diff --git a/src/coreclr/vm/precode.cpp b/src/coreclr/vm/precode.cpp index c997af5f678818..6049a39d0e3c5f 100644 --- a/src/coreclr/vm/precode.cpp +++ b/src/coreclr/vm/precode.cpp @@ -610,6 +610,8 @@ void StubPrecode::StaticInitialize() #endif #ifdef TARGET_LOONGARCH64 _ASSERTE(((*((short*)PCODEToPINSTR((PCODE)StubPrecodeCode) + OFFSETOF_PRECODE_TYPE)) >> 5) == StubPrecode::Type); +#elif TARGET_RISCV64 + _ASSERTE((*((BYTE*)PCODEToPINSTR((PCODE)StubPrecodeCode) + OFFSETOF_PRECODE_TYPE)) == StubPrecode::Type); #else _ASSERTE((*((BYTE*)PCODEToPINSTR((PCODE)StubPrecodeCode) + OFFSETOF_PRECODE_TYPE)) == StubPrecode::Type); #endif @@ -722,6 +724,8 @@ void FixupPrecode::StaticInitialize() #endif #ifdef TARGET_LOONGARCH64 _ASSERTE(((*((short*)PCODEToPINSTR((PCODE)StubPrecodeCode) + OFFSETOF_PRECODE_TYPE)) >> 5) == StubPrecode::Type); +#elif TARGET_RISCV64 + _ASSERTE((*((BYTE*)PCODEToPINSTR((PCODE)FixupPrecodeCode) + OFFSETOF_PRECODE_TYPE)) == FixupPrecode::Type); #else _ASSERTE(*((BYTE*)PCODEToPINSTR((PCODE)FixupPrecodeCode) + OFFSETOF_PRECODE_TYPE) == FixupPrecode::Type); #endif diff --git a/src/coreclr/vm/precode.h b/src/coreclr/vm/precode.h index 4822ccfec7054d..7862caccfd9f27 100644 --- a/src/coreclr/vm/precode.h +++ b/src/coreclr/vm/precode.h @@ -44,6 +44,11 @@ EXTERN_C VOID STDCALL PrecodeRemotingThunk(); #define SIZEOF_PRECODE_BASE CODE_SIZE_ALIGN #define OFFSETOF_PRECODE_TYPE 0 +#elif defined(HOST_RISCV64) + +#define SIZEOF_PRECODE_BASE CODE_SIZE_ALIGN +#define OFFSETOF_PRECODE_TYPE 0 + #endif // HOST_AMD64 #ifndef DACCESS_COMPILE @@ -63,6 +68,8 @@ struct InvalidPrecode static const int Type = 0; #elif defined(HOST_LOONGARCH64) static const int Type = 0xff; +#elif defined(HOST_RISCV64) + static const int Type = 0xff; #endif }; @@ -98,6 +105,9 @@ struct StubPrecode #elif defined(HOST_LOONGARCH64) static const int Type = 0x4; static const int CodeSize = 24; +#elif defined(HOST_RISCV64) + static const int Type = 0x17; + static const int CodeSize = 24; #endif // HOST_AMD64 BYTE m_code[CodeSize]; @@ -234,6 +244,10 @@ struct FixupPrecode static const int Type = 0x3; static const int CodeSize = 32; static const int FixupCodeOffset = 12; +#elif defined(HOST_RISCV64) + static const int Type = 0x97; + static const int CodeSize = 32; + static const int FixupCodeOffset = 10; #endif // HOST_AMD64 BYTE m_code[CodeSize]; @@ -422,10 +436,13 @@ class Precode { #ifdef OFFSETOF_PRECODE_TYPE -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_LOONGARCH64) assert(0 == OFFSETOF_PRECODE_TYPE); short type = *((short*)m_data); type >>= 5; +#elif defined(TARGET_RISCV64) + assert(0 == OFFSETOF_PRECODE_TYPE); + BYTE type = *((BYTE*)m_data + OFFSETOF_PRECODE_TYPE); #else BYTE type = m_data[OFFSETOF_PRECODE_TYPE]; #endif diff --git a/src/coreclr/vm/prestub.cpp b/src/coreclr/vm/prestub.cpp index 4b4373ac40e818..afc658961517bf 100644 --- a/src/coreclr/vm/prestub.cpp +++ b/src/coreclr/vm/prestub.cpp @@ -2303,7 +2303,7 @@ PCODE TheVarargNDirectStub(BOOL hasRetBuffArg) { LIMITED_METHOD_CONTRACT; -#if !defined(TARGET_X86) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_X86) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) if (hasRetBuffArg) { return GetEEFuncEntryPoint(VarargPInvokeStub_RetBuffArg); diff --git a/src/coreclr/vm/riscv64/asmconstants.h b/src/coreclr/vm/riscv64/asmconstants.h index 9a0cdd4e406d37..12aedda2db5f87 100644 --- a/src/coreclr/vm/riscv64/asmconstants.h +++ b/src/coreclr/vm/riscv64/asmconstants.h @@ -1,4 +1,253 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +// asmconstants.h - +// +// This header defines field offsets and constants used by assembly code +// Be sure to rebuild clr/src/vm/ceemain.cpp after changing this file, to +// ensure that the constants match the expected C/C++ values -#error "TODO-RISCV64: missing implementation" +#include "../../inc/switches.h" + +//----------------------------------------------------------------------------- + +#ifndef ASMCONSTANTS_C_ASSERT +#define ASMCONSTANTS_C_ASSERT(cond) +#endif + +#ifndef ASMCONSTANTS_RUNTIME_ASSERT +#define ASMCONSTANTS_RUNTIME_ASSERT(cond) +#endif + +// Some constants are different in _DEBUG builds. This macro factors out ifdefs from below. +#ifdef _DEBUG +#define DBG_FRE(dbg,fre) dbg +#else +#define DBG_FRE(dbg,fre) fre +#endif + +#define DynamicHelperFrameFlags_Default 0 +#define DynamicHelperFrameFlags_ObjectArg 1 +#define DynamicHelperFrameFlags_ObjectArg2 2 + +#define Thread__m_fPreemptiveGCDisabled 0x0C +#define Thread__m_pFrame 0x10 + +ASMCONSTANTS_C_ASSERT(Thread__m_fPreemptiveGCDisabled == offsetof(Thread, m_fPreemptiveGCDisabled)); +ASMCONSTANTS_C_ASSERT(Thread__m_pFrame == offsetof(Thread, m_pFrame)); + +#define Thread_m_pFrame Thread__m_pFrame +#define Thread_m_fPreemptiveGCDisabled Thread__m_fPreemptiveGCDisabled + +#define METHODDESC_REGISTER t2 + +#define SIZEOF__ArgumentRegisters 0x40 +ASMCONSTANTS_C_ASSERT(SIZEOF__ArgumentRegisters == sizeof(ArgumentRegisters)) + +// 8*8=0x40, fa0-fa7 +#define SIZEOF__FloatArgumentRegisters 0x40 +ASMCONSTANTS_C_ASSERT(SIZEOF__FloatArgumentRegisters == sizeof(FloatArgumentRegisters)) + +#define ASM_ENREGISTERED_RETURNTYPE_MAXSIZE 0x10 +ASMCONSTANTS_C_ASSERT(ASM_ENREGISTERED_RETURNTYPE_MAXSIZE == ENREGISTERED_RETURNTYPE_MAXSIZE) + +#define CallDescrData__pSrc 0x00 +#define CallDescrData__numStackSlots 0x08 +#define CallDescrData__pArgumentRegisters 0x10 +#define CallDescrData__pFloatArgumentRegisters 0x18 +#define CallDescrData__fpReturnSize 0x20 +#define CallDescrData__pTarget 0x28 +#define CallDescrData__returnValue 0x30 + +ASMCONSTANTS_C_ASSERT(CallDescrData__pSrc == offsetof(CallDescrData, pSrc)) +ASMCONSTANTS_C_ASSERT(CallDescrData__numStackSlots == offsetof(CallDescrData, numStackSlots)) +ASMCONSTANTS_C_ASSERT(CallDescrData__pArgumentRegisters == offsetof(CallDescrData, pArgumentRegisters)) +ASMCONSTANTS_C_ASSERT(CallDescrData__pFloatArgumentRegisters == offsetof(CallDescrData, pFloatArgumentRegisters)) +ASMCONSTANTS_C_ASSERT(CallDescrData__fpReturnSize == offsetof(CallDescrData, fpReturnSize)) +ASMCONSTANTS_C_ASSERT(CallDescrData__pTarget == offsetof(CallDescrData, pTarget)) +ASMCONSTANTS_C_ASSERT(CallDescrData__returnValue == offsetof(CallDescrData, returnValue)) + +#define CallDescrData__flagOneFloat 0x1 +#define CallDescrData__flagOneDouble 0x11 +#define CallDescrData__flagFloatInt 0x2 +#define CallDescrData__flagFloatLong 0x22 +#define CallDescrData__flagDoubleInt 0x12 +#define CallDescrData__flagDoubleLong 0x32 +#define CallDescrData__flagIntFloat 0x4 +#define CallDescrData__flagIntDouble 0x24 +#define CallDescrData__flagLongFloat 0x14 +#define CallDescrData__flagLongDouble 0x34 +#define CallDescrData__flagFloatFloat 0x8 +#define CallDescrData__flagFloatDouble 0x28 +#define CallDescrData__flagDoubleFloat 0x18 +#define CallDescrData__flagDoubleDouble 0x38 + +ASMCONSTANTS_C_ASSERT(CallDescrData__flagOneFloat == (int)STRUCT_FLOAT_FIELD_ONLY_ONE) +ASMCONSTANTS_C_ASSERT(CallDescrData__flagOneDouble == (int)(STRUCT_FLOAT_FIELD_ONLY_ONE | STRUCT_FIRST_FIELD_SIZE_IS8)) +ASMCONSTANTS_C_ASSERT(CallDescrData__flagFloatInt == (int)STRUCT_FLOAT_FIELD_FIRST) +ASMCONSTANTS_C_ASSERT(CallDescrData__flagFloatLong == (int)(STRUCT_FLOAT_FIELD_FIRST | STRUCT_SECOND_FIELD_SIZE_IS8)) +ASMCONSTANTS_C_ASSERT(CallDescrData__flagDoubleInt == (int)(STRUCT_FLOAT_FIELD_FIRST | STRUCT_FIRST_FIELD_SIZE_IS8)) +ASMCONSTANTS_C_ASSERT(CallDescrData__flagDoubleLong == (int)(CallDescrData__flagDoubleInt | STRUCT_SECOND_FIELD_SIZE_IS8)) +ASMCONSTANTS_C_ASSERT(CallDescrData__flagIntFloat == (int)STRUCT_FLOAT_FIELD_SECOND) +ASMCONSTANTS_C_ASSERT(CallDescrData__flagIntDouble == (int)(STRUCT_FLOAT_FIELD_SECOND | STRUCT_SECOND_FIELD_SIZE_IS8)) +ASMCONSTANTS_C_ASSERT(CallDescrData__flagLongFloat == (int)(STRUCT_FLOAT_FIELD_SECOND | STRUCT_FIRST_FIELD_SIZE_IS8)) +ASMCONSTANTS_C_ASSERT(CallDescrData__flagLongDouble == (int)(CallDescrData__flagLongFloat | STRUCT_SECOND_FIELD_SIZE_IS8)) +ASMCONSTANTS_C_ASSERT(CallDescrData__flagFloatFloat == (int)STRUCT_FLOAT_FIELD_ONLY_TWO) +ASMCONSTANTS_C_ASSERT(CallDescrData__flagFloatDouble == (int)(STRUCT_FLOAT_FIELD_ONLY_TWO | STRUCT_SECOND_FIELD_SIZE_IS8)) +ASMCONSTANTS_C_ASSERT(CallDescrData__flagDoubleFloat == (int)(STRUCT_FLOAT_FIELD_ONLY_TWO | STRUCT_FIRST_FIELD_SIZE_IS8)) +ASMCONSTANTS_C_ASSERT(CallDescrData__flagDoubleDouble == (int)(CallDescrData__flagDoubleFloat | STRUCT_SECOND_FIELD_SIZE_IS8)) + +#define CORINFO_NullReferenceException_ASM 0 +ASMCONSTANTS_C_ASSERT( CORINFO_NullReferenceException_ASM + == CORINFO_NullReferenceException); + + +#define CORINFO_IndexOutOfRangeException_ASM 3 +ASMCONSTANTS_C_ASSERT( CORINFO_IndexOutOfRangeException_ASM + == CORINFO_IndexOutOfRangeException); + + +// Offset of the array containing the address of captured registers in MachState +#define MachState__captureCalleeSavedRegisters 0x0 +ASMCONSTANTS_C_ASSERT(MachState__captureCalleeSavedRegisters == offsetof(MachState, captureCalleeSavedRegisters)) + +// Offset of the array containing the address of preserved registers in MachState +#define MachState__ptrCalleeSavedRegisters 0x70 +ASMCONSTANTS_C_ASSERT(MachState__ptrCalleeSavedRegisters == offsetof(MachState, ptrCalleeSavedRegisters)) + +#define MachState__isValid 0xf0 +ASMCONSTANTS_C_ASSERT(MachState__isValid == offsetof(MachState, _isValid)) + +#define LazyMachState_captureCalleeSavedRegisters MachState__captureCalleeSavedRegisters +ASMCONSTANTS_C_ASSERT(LazyMachState_captureCalleeSavedRegisters == offsetof(LazyMachState, captureCalleeSavedRegisters)) + +#define LazyMachState_captureSp (MachState__isValid+8) // padding for alignment +ASMCONSTANTS_C_ASSERT(LazyMachState_captureSp == offsetof(LazyMachState, captureSp)) + +#define LazyMachState_captureIp (LazyMachState_captureSp+8) +ASMCONSTANTS_C_ASSERT(LazyMachState_captureIp == offsetof(LazyMachState, captureIp)) + +#define VASigCookie__pNDirectILStub 0x8 +ASMCONSTANTS_C_ASSERT(VASigCookie__pNDirectILStub == offsetof(VASigCookie, pNDirectILStub)) + +#define DelegateObject___methodPtr 0x18 +ASMCONSTANTS_C_ASSERT(DelegateObject___methodPtr == offsetof(DelegateObject, _methodPtr)); + +#define DelegateObject___target 0x08 +ASMCONSTANTS_C_ASSERT(DelegateObject___target == offsetof(DelegateObject, _target)); + +#define SIZEOF__GSCookie 0x8 +ASMCONSTANTS_C_ASSERT(SIZEOF__GSCookie == sizeof(GSCookie)); + +#define SIZEOF__Frame 0x10 +ASMCONSTANTS_C_ASSERT(SIZEOF__Frame == sizeof(Frame)); + +#define SIZEOF__CONTEXT 0x220 +ASMCONSTANTS_C_ASSERT(SIZEOF__CONTEXT == sizeof(T_CONTEXT)); + + +//========================================= +#define MethodTable__m_dwFlags 0x0 +ASMCONSTANTS_C_ASSERT(MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); + +#define MethodTable__m_BaseSize 0x04 +ASMCONSTANTS_C_ASSERT(MethodTable__m_BaseSize == offsetof(MethodTable, m_BaseSize)); + +#define MethodTable__m_ElementType DBG_FRE(0x38, 0x30) +ASMCONSTANTS_C_ASSERT(MethodTable__m_ElementType == offsetof(MethodTable, m_pMultipurposeSlot1)); + +#define ArrayBase__m_NumComponents 0x8 +ASMCONSTANTS_C_ASSERT(ArrayBase__m_NumComponents == offsetof(ArrayBase, m_NumComponents)); + +#define PtrArray__m_Array 0x10 +ASMCONSTANTS_C_ASSERT(PtrArray__m_Array == offsetof(PtrArray, m_Array)); + +#define TypeHandle_CanCast 0x1 // TypeHandle::CanCast + +//========================================= + + + +#ifdef FEATURE_COMINTEROP + +#define SIZEOF__ComMethodFrame 0x70 +ASMCONSTANTS_C_ASSERT(SIZEOF__ComMethodFrame == sizeof(ComMethodFrame)); + +#define UnmanagedToManagedFrame__m_pvDatum 0x10 +ASMCONSTANTS_C_ASSERT(UnmanagedToManagedFrame__m_pvDatum == offsetof(UnmanagedToManagedFrame, m_pvDatum)); + +#endif // FEATURE_COMINTEROP + + +#define REDIRECTSTUB_SP_OFFSET_CONTEXT 0 + +#define CONTEXT_Pc 0x108 +ASMCONSTANTS_C_ASSERT(CONTEXT_Pc == offsetof(T_CONTEXT,Pc)) + +#define SIZEOF__FaultingExceptionFrame (SIZEOF__Frame + 0x10 + SIZEOF__CONTEXT) +#define FaultingExceptionFrame__m_fFilterExecuted SIZEOF__Frame +ASMCONSTANTS_C_ASSERT(SIZEOF__FaultingExceptionFrame == sizeof(FaultingExceptionFrame)); +ASMCONSTANTS_C_ASSERT(FaultingExceptionFrame__m_fFilterExecuted == offsetof(FaultingExceptionFrame, m_fFilterExecuted)); + +#define SIZEOF__FixupPrecode 40 +#define MethodDesc_ALIGNMENT_SHIFT 3 + +ASMCONSTANTS_C_ASSERT(MethodDesc_ALIGNMENT_SHIFT == MethodDesc::ALIGNMENT_SHIFT); + +#define ResolveCacheElem__pMT 0x00 +#define ResolveCacheElem__token 0x08 +#define ResolveCacheElem__target 0x10 +#define ResolveCacheElem__pNext 0x18 +ASMCONSTANTS_C_ASSERT(ResolveCacheElem__target == offsetof(ResolveCacheElem, target)); +ASMCONSTANTS_C_ASSERT(ResolveCacheElem__pNext == offsetof(ResolveCacheElem, pNext)); + +#define DomainLocalModule__m_pDataBlob 0x30 +#define DomainLocalModule__m_pGCStatics 0x20 +ASMCONSTANTS_C_ASSERT(DomainLocalModule__m_pDataBlob == offsetof(DomainLocalModule, m_pDataBlob)); +ASMCONSTANTS_C_ASSERT(DomainLocalModule__m_pGCStatics == offsetof(DomainLocalModule, m_pGCStatics)); + + +// For JIT_PInvokeBegin and JIT_PInvokeEnd helpers +#define Frame__m_Next 0x08 +ASMCONSTANTS_C_ASSERT(Frame__m_Next == offsetof(Frame, m_Next)) + +#define InlinedCallFrame__m_Datum 0x10 +ASMCONSTANTS_C_ASSERT(InlinedCallFrame__m_Datum == offsetof(InlinedCallFrame, m_Datum)) + +#define InlinedCallFrame__m_pCallSiteSP 0x20 +ASMCONSTANTS_C_ASSERT(InlinedCallFrame__m_pCallSiteSP == offsetof(InlinedCallFrame, m_pCallSiteSP)) + +#define InlinedCallFrame__m_pCallerReturnAddress 0x28 +ASMCONSTANTS_C_ASSERT(InlinedCallFrame__m_pCallerReturnAddress == offsetof(InlinedCallFrame, m_pCallerReturnAddress)) + +#define InlinedCallFrame__m_pCalleeSavedFP 0x30 +ASMCONSTANTS_C_ASSERT(InlinedCallFrame__m_pCalleeSavedFP == offsetof(InlinedCallFrame, m_pCalleeSavedFP)) + +#define InlinedCallFrame__m_pThread 0x38 +ASMCONSTANTS_C_ASSERT(InlinedCallFrame__m_pThread == offsetof(InlinedCallFrame, m_pThread)) + +#define FixupPrecodeData__Target 0x00 +ASMCONSTANTS_C_ASSERT(FixupPrecodeData__Target == offsetof(FixupPrecodeData, Target)) + +#define FixupPrecodeData__MethodDesc 0x08 +ASMCONSTANTS_C_ASSERT(FixupPrecodeData__MethodDesc == offsetof(FixupPrecodeData, MethodDesc)) + +#define FixupPrecodeData__PrecodeFixupThunk 0x10 +ASMCONSTANTS_C_ASSERT(FixupPrecodeData__PrecodeFixupThunk == offsetof(FixupPrecodeData, PrecodeFixupThunk)) + +#define StubPrecodeData__Target 0x08 +ASMCONSTANTS_C_ASSERT(StubPrecodeData__Target == offsetof(StubPrecodeData, Target)) + +#define StubPrecodeData__MethodDesc 0x00 +ASMCONSTANTS_C_ASSERT(StubPrecodeData__MethodDesc == offsetof(StubPrecodeData, MethodDesc)) + +#define CallCountingStubData__RemainingCallCountCell 0x00 +ASMCONSTANTS_C_ASSERT(CallCountingStubData__RemainingCallCountCell == offsetof(CallCountingStubData, RemainingCallCountCell)) + +#define CallCountingStubData__TargetForMethod 0x08 +ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForMethod == offsetof(CallCountingStubData, TargetForMethod)) + +#define CallCountingStubData__TargetForThresholdReached 0x10 +ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offsetof(CallCountingStubData, TargetForThresholdReached)) + +#undef ASMCONSTANTS_RUNTIME_ASSERT +#undef ASMCONSTANTS_C_ASSERT diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S index 3515f38c8120d7..cc063af47a2ae6 100644 --- a/src/coreclr/vm/riscv64/asmhelpers.S +++ b/src/coreclr/vm/riscv64/asmhelpers.S @@ -1,7 +1,955 @@ // Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. +// The .NET Foundation llacenses this file to you under the MIT license. #include "asmconstants.h" #include "unixasmmacros.inc" -#error "TODO-RISCV64: missing implementation" +LEAF_ENTRY GetCurrentIP, _TEXT + addi a0, ra, 0 + jalr x0, ra, 0 +LEAF_END GetCurrentIP, _TEXT + +// LPVOID __stdcall GetCurrentSP(void)// +LEAF_ENTRY GetCurrentSP, _TEXT + addi a0, sp, 0 + jalr x0, ra, 0 +LEAF_END GetCurrentSP, _TEXT + +//----------------------------------------------------------------------------- +// The following Macros help in WRITE_BARRIER Implementations +// WRITE_BARRIER_ENTRY +// +// Declare the start of a write barrier function. Use similarly to NESTED_ENTRY. This is the only legal way +// to declare a write barrier function. +// +.macro WRITE_BARRIER_ENTRY name + LEAF_ENTRY \name, _TEXT +.endm + +// WRITE_BARRIER_END +// +// The partner to WRITE_BARRIER_ENTRY, used llake NESTED_END. +// +.macro WRITE_BARRIER_END name + LEAF_END_MARKED \name, _TEXT +.endm + +// void JIT_UpdateWriteBarrierState(bool skipEphemeralCheck, size_t writeableOffset) +// +// Update shadow copies of the various state info required for barrier +// +// State info is contained in a llateral pool at the end of the function +// Placed in text section so that it is close enough to use ldr llateral and still +// be relocatable. Ellaminates need for PREPARE_EXTERNAL_VAR in hot code. +// +// Allagn and group state info together so it fits in a single cache line +// and each entry can be written atomically +// +WRITE_BARRIER_ENTRY JIT_UpdateWriteBarrierState + // a0-a7 will contain intended new state + // t0 will preserve skipEphemeralCheck + // t2 will be used for pointers + + addi t0, a0, 0 + addi t1, a1, 0 + + lla a0, g_card_table + ld a0, 0(a0) + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + lla a1, g_card_bundle_table + ld a1, 0(a1) +#endif + +#ifdef WRITE_BARRIER_CHECK + lla a2, g_GCShadow + ld a2, 0(a2) +#endif + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + lla a3, g_sw_ww_table + ld a3, 0(a3) +#endif + + lla a4, g_ephemeral_low + ld a4, 0(a4) + + lla a5, g_ephemeral_high + ld a5, 0(a5) + + beq t0, zero, LOCAL_LABEL(EphemeralCheckEnabled) + + ori a4, zero, 0 + addi a5, zero, -1 +LOCAL_LABEL(EphemeralCheckEnabled): + + lla a6, g_lowest_address + ld a6, 0(a6) + + lla a7, g_highest_address + ld a7, 0(a7) + + // Update wbs state + lla t2, JIT_WriteBarrier_Table_Loc + ld t2, 0(t2) + add t2, t2, t1 + + sd a0, 0(t2) + sd a1, 8(t2) + sd a2, 16(t2) + sd a3, 24(t2) + sd a4, 32(t2) + sd a5, 40(t2) + sd a6, 48(t2) + sd a7, 56(t2) + + EPILOG_RETURN + +WRITE_BARRIER_END JIT_UpdateWriteBarrierState + +// ---------------------------------------------------------------------------------------- +// __declspec(naked) void F_CALL_CONV JIT_WriteBarrier_Callable(Object **dst, Object* val) +LEAF_ENTRY JIT_WriteBarrier_Callable, _TEXT + // Setup args for JIT_WriteBarrier. a0 = dst ; a1 = val + addi t3, a0, 0 // t3 = dst + addi t4, a1, 0 // t4 = val + + // Branch to the write barrier + lla t1, JIT_WriteBarrier_Loc + ld t1, 0(t1) + jalr x0, t1, 0 +LEAF_END JIT_WriteBarrier_Callable, _TEXT + + +.balign 64 // Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line +// ------------------------------------------------------------------ +// Start of the writeable code region +LEAF_ENTRY JIT_PatchedCodeStart, _TEXT + jalr x0, ra, 0 +LEAF_END JIT_PatchedCodeStart, _TEXT + +// void JIT_ByRefWriteBarrier +// +// On entry: +// t5 : the source address (points to object reference to write) +// t3: the destination address (object reference written here) +// +// On exit: +// t5 : incremented by 8 +// t4 : trashed +// + +// void JIT_ByRefWriteBarrier +WRITE_BARRIER_ENTRY JIT_ByRefWriteBarrier + ld t4, 0(t5) + addi t5, t5, 8 + tail C_FUNC(JIT_CheckedWriteBarrier) +WRITE_BARRIER_END JIT_ByRefWriteBarrier + +//----------------------------------------------------------------------------- +// Simple WriteBarriers +// void JIT_CheckedWriteBarrier(Object** dst, Object* src) +// +// On entry: +// t3 : the destination address (LHS of the assignment) +// t4 : the object reference (RHS of the assignment) +// +// On exit: +// t1 : trashed +// t0 : trashed +// t6 : trashed +// t3 : trashed (incremented by 8 to implement JIT_ByRefWriteBarrier contract) +// + +WRITE_BARRIER_ENTRY JIT_CheckedWriteBarrier + lla t6, wbs_lowest_address + ld t6, 0(t6) + slt t6, t3, t6 + + lla t1, wbs_highest_address + ld t1, 0(t1) + slt t0, t1, t3 + or t6, t0, t6 + beq t6, zero, C_FUNC(JIT_WriteBarrier) + + sd t4, 0(t3) + addi t3, t3, 8 + jalr x0, ra, 0 +WRITE_BARRIER_END JIT_CheckedWriteBarrier + +// void JIT_WriteBarrier(Object** dst, Object* src) +// On entry: +// t3 : the destination address (LHS of the assignment) +// t4 1 the object reference (RHS of the assignment) +// +// On exit: +// t0 : trashed +// t1 : trashed +// t6 : trashed +// t4 : trashed +// t3 : trashed (incremented by 8 to implement JIT_ByRefWriteBarrier contract) +// +WRITE_BARRIER_ENTRY JIT_WriteBarrier + + // TODO: sync_release (runtime detection required) + fence rw, rw + + sd t4, 0(t3) + +#ifdef WRITE_BARRIER_CHECK + // Update GC Shadow Heap + + // Do not perform the work if g_GCShadow is 0 + lla t1, wbs_GCShadow + ld t1, 0(t1) + + beq t1, zero, LOCAL_LABEL(ShadowUpdateDisabled) + + // Compute address of shadow heap location: + // pShadow = g_GCShadow + ($t3 - g_lowest_address) + lla t6, wbs_lowest_address + ld t6, 0(t6) + + sub t6, t3, t6 + add t0, t6, t1 + + // if (pShadow >= g_GCShadowEnd) goto end + lla t6, g_GCShadowEnd + ld t6, 0(t6) + + slt t6, t0, t6 + beq t6, zero, LOCAL_LABEL(ShadowUpdateEnd) + + // *pShadow = $t4 + sd t4, 0(t0) + + // Ensure that the write to the shadow heap occurs before the read from the GC heap so that race + // conditions are caught by INVALIDGCVALUE. + fence rw, rw + + // if (*t3 == t4) goto end + ld t6, 0(t3) + beq t6, t4, LOCAL_LABEL(ShadowUpdateEnd) + + // *pShadow = INVALIDGCVALUE (0xcccccccd) + li t6, 0xcccccccd + sd t6, 0(t0) +LOCAL_LABEL(ShadowUpdateEnd): +LOCAL_LABEL(ShadowUpdateDisabled): +#endif + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // Update the write watch table if necessary + + lla t6, wbs_sw_ww_table + ld t6, 0(t6) + beq t6, zero, LOCAL_LABEL(CheckCardTable) + + srli t4, t3, 0xc + add t6, t6, t4 // SoftwareWriteWatch::AddressToTableByteIndexShift + lb t4, 0(t6) + bne t4, zero, LOCAL_LABEL(CheckCardTable) + + ori t4, zero, 0xFF + sb t4, 0(t6) + +LOCAL_LABEL(CheckCardTable): +#endif + // Branch to Exit if the reference is not in the Gen0 heap + lla t6, wbs_ephemeral_low + ld t6, 0(t6) + beq t6, zero, LOCAL_LABEL(SkipEphemeralCheck) + + slt t0, t4, t6 + lla t6, wbs_ephemeral_high + ld t6, 0(t6) + slt t1, t6, t4 + or t0, t1, t0 + bne t0, zero, LOCAL_LABEL(Exit) + +LOCAL_LABEL(SkipEphemeralCheck): + // Check if we need to update the card table + lla t6, wbs_card_table + ld t6, 0(t6) + srli t0, t3, 11 + add t4, t6, t0 + lbu t1, 0(t4) + ori t0, zero, 0xFF + beq t1, t0, LOCAL_LABEL(Exit) + + sb t0, 0(t4) + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + // Check if we need to update the card bundle table + lla t6, wbs_card_bundle_table + ld t6, 0(t6) + srli t0, t3, 21 + add t4, t6, t0 + + lbu t6, 0(t4) + ori t0, zero, 0xFF + beq t6, t0, LOCAL_LABEL(Exit) + + sb t0, 0(t4) +#endif +LOCAL_LABEL(Exit): + addi t3, t3, 8 + jalr x0, ra, 0 +WRITE_BARRIER_END JIT_WriteBarrier + +// Begin patchable literal pool + .balign 64 // Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line +WRITE_BARRIER_ENTRY JIT_WriteBarrier_Table +wbs_begin: +wbs_card_table: + .quad 0 +wbs_card_bundle_table: + .quad 0 +wbs_GCShadow: + .quad 0 +wbs_sw_ww_table: + .quad 0 +wbs_ephemeral_low: + .quad 0 +wbs_ephemeral_high: + .quad 0 +wbs_lowest_address: + .quad 0 +wbs_highest_address: + .quad 0 +WRITE_BARRIER_END JIT_WriteBarrier_Table + +// ------------------------------------------------------------------ +// End of the writeable code region +LEAF_ENTRY JIT_PatchedCodeLast, _TEXT + jalr x0, ra, 0 +LEAF_END JIT_PatchedCodeLast, _TEXT + + +// +// If a preserved register were pushed onto the stack between +// the managed caller and the H_M_F, ptrS0_S8 will point to its +// location on the stack and it would have been updated on the +// stack by the GC already and it will be popped back into the +// appropriate register when the appropriate epilog is run. +// +// Otherwise, the register is preserved across all the code +// in this HCALL or FCALL, so we need to update those registers +// here because the GC will have updated our copies in the +// frame. +// +// So, if ptrS0_S8 points into the MachState, we need to update +// the register here. That's what this macro does. +// +.macro RestoreRegMS idx, reg + // Incoming: + // + // a0 = address of MachState + // + // idx: Index of the callee register + // s0/fp: 0, s1: 1, s3-s11: 4-11, gp: 12 tp: 13 + // + // reg: Register name (e.g. s0, s1, etc) + // + // Get the address of the specified captured register from machine state + addi a2, a0, (MachState__captureCalleeSavedRegisters + (\idx * 8)) + + //// Get the content of specified preserved register pointer from machine state + ld a3, (MachState__ptrCalleeSavedRegisters + (\idx * 8))(a0) + + bne a2, a3, LOCAL_LABEL(NoRestore_\reg) + + ld \reg, 0(a2) +LOCAL_LABEL(NoRestore_\reg): + +.endm + +NESTED_ENTRY ThePreStub, _TEXT, NoHandler + PROLOG_WITH_TRANSITION_BLOCK + + addi a1, METHODDESC_REGISTER, 0 // pMethodDesc + + addi a0, sp, __PWTB_TransitionBlock // pTransitionBlock + call PreStubWorker + addi t4, a0, 0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + EPILOG_BRANCH_REG t4 +NESTED_END ThePreStub, _TEXT + +// ------------------------------------------------------------------\ + +// EXTERN_C int __fastcall HelperMethodFrameRestoreState( +// INDEBUG_COMMA(HelperMethodFrame *pFrame) +// MachState *pState +// ) +LEAF_ENTRY HelperMethodFrameRestoreState, _TEXT +#ifdef _DEBUG + addi a0, a1, 0 +#endif + + // If machine state is invalid, then simply exit + lw a1, MachState__isValid(a0) + beq a1, zero, LOCAL_LABEL(Done) + + // manually assign index + // s0/fp: 0, s1: 1, s3-s11: 4-11, gp: 12 tp: 13 + RestoreRegMS 0, s0 + RestoreRegMS 1, s1 + RestoreRegMS 2, s2 + RestoreRegMS 3, s3 + RestoreRegMS 4, s4 + RestoreRegMS 5, s5 + RestoreRegMS 6, s6 + RestoreRegMS 7, s7 + RestoreRegMS 8, s8 + RestoreRegMS 9, s9 + RestoreRegMS 10, s10 + RestoreRegMS 11, s11 + RestoreRegMS 12, tp + RestoreRegMS 13, gp +LOCAL_LABEL(Done): + // Its imperative that the return value of HelperMethodFrameRestoreState is zero + // as it is used in the state machine to loop until it becomes zero. + // Refer to HELPER_METHOD_FRAME_END macro for details. + addi a0, zero, 0 + jalr x0, ra, 0 +LEAF_END HelperMethodFrameRestoreState, _TEXT + +//----------------------------------------------------------------------------- +// This routine captures the machine state. It is used by helper method frame +//----------------------------------------------------------------------------- +//void LazyMachStateCaptureState(struct LazyMachState *pState)// +LEAF_ENTRY LazyMachStateCaptureState, _TEXT + // marks that this is not yet valid + sw zero, (MachState__isValid)(a0) + + sd ra, (LazyMachState_captureIp)(a0) + + // save sp register. + sd sp, (LazyMachState_captureSp)(a0) + + // save non-volatile registers that can contain object references + addi a1, a0, LazyMachState_captureCalleeSavedRegisters + + sd s0, 0(a1) + sd s1, 8(a1) + sd s2, 16(a1) + sd s3, 24(a1) + sd s4, 32(a1) + sd s5, 40(a1) + sd s6, 48(a1) + sd s7, 56(a1) + sd s8, 64(a1) + sd s9, 72(a1) + sd s10, 80(a1) + sd s11, 88(a1) + sd tp, 96(a1) + sd gp, 104(a1) + + jalr x0, ra, 0 +LEAF_END LazyMachStateCaptureState, _TEXT + +// ------------------------------------------------------------------ +// The call in ndirect import precode points to this function. +NESTED_ENTRY NDirectImportThunk, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 0xa0 + SAVE_ARGUMENT_REGISTERS sp, 0x20 + SAVE_FLOAT_ARGUMENT_REGISTERS sp, 0x60 + + addi a0, t2, 0 + call C_FUNC(NDirectImportWorker) + addi t4, a0, 0 + + // pop the stack and restore original register state + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 0x60 + RESTORE_ARGUMENT_REGISTERS sp, 0x20 + //EPILOG_RESTORE_REG gp, 16 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 0xa0 + + // If we got back from NDirectImportWorker, the MD has been successfully + // linked. Proceed to execute the original DLL call. + EPILOG_BRANCH_REG t4 +NESTED_END NDirectImportThunk, _TEXT + +// void SinglecastDelegateInvokeStub(Delegate *pThis) +LEAF_ENTRY SinglecastDelegateInvokeStub, _TEXT + beq a0, zero, LOCAL_LABEL(LNullThis) + + ld t4, (DelegateObject___methodPtr)(a0) + ld a0, (DelegateObject___target)(a0) + jalr x0, t4, 0 + +LOCAL_LABEL(LNullThis): + addi a0, zero, CORINFO_NullReferenceException_ASM + tail JIT_InternalThrow +LEAF_END SinglecastDelegateInvokeStub, _TEXT + +// ------------------------------------------------------------------ +// ThePreStubPatch() +LEAF_ENTRY ThePreStubPatch, _TEXT +.globl C_FUNC(ThePreStubPatchLabel) +C_FUNC(ThePreStubPatchLabel): + jalr x0, ra, 0 +LEAF_END ThePreStubPatch, _TEXT + +NESTED_ENTRY TheUMEntryPrestub, _TEXT, UnhandledExceptionHandlerUnix + // Save arguments and return address + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 0xa0 + //PROLOG_SAVE_REG gp, 16 + SAVE_ARGUMENT_REGISTERS sp, 32 + SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96 + + + addi a0, t2, 0 + call TheUMEntryPrestubWorker + addi t4, a0, 0 + + // pop the stack and restore original register state + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96 + RESTORE_ARGUMENT_REGISTERS sp, 32 + //EPILOG_RESTORE_REG gp, 16 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 0xa0 + + // and tailcall to the actual method + EPILOG_BRANCH_REG t4 +NESTED_END TheUMEntryPrestub, _TEXT + +// ------------------------------------------------------------------ +// void* JIT_GetSharedGCStaticBase(SIZE_T moduleDomainID, DWORD dwClassDomainID) + +LEAF_ENTRY JIT_GetSharedGCStaticBase_SingleAppDomain, _TEXT + // If class is not initialized, bail to C++ helper + addi a2, a0, DomainLocalModule__m_pDataBlob + add a2, a2, a1 + lb a2, 0(a2) + andi t5, a2, 1 + beq t5, zero, LOCAL_LABEL(JIT_GetSharedGCStaticBase_SingleAppDomain_CallHelper) + + ld a0, DomainLocalModule__m_pGCStatics(a0) + jalr x0, ra, 0 + +LOCAL_LABEL(JIT_GetSharedGCStaticBase_SingleAppDomain_CallHelper): + // Tail call JIT_GetSharedGCStaticBase_Helper + call JIT_GetSharedGCStaticBase_Helper +LEAF_END JIT_GetSharedGCStaticBase_SingleAppDomain, _TEXT + +// ------------------------------------------------------------------ +// ResolveWorkerChainLookupAsmStub +// +// This method will perform a quick chained lookup of the entry if the +// initial cache lookup fails. +// +// On Entry: +// t1 contains the pointer to the current ResolveCacheElem +// t5 contains the address of the indirection (and the flags in the low two bits) +// t2 contains our contract the DispatchToken +// Must be preserved: +// a0 contains the instance object ref that we are making an interface call on +// t1 Must point to a ResolveCacheElem [For Sanity] +// [a1-a7] contains any additional register arguments for the interface method +// +// Loaded from a0 +// t6 contains our type the MethodTable (from object ref in a0) +// +// On Exit: +// a0, [a1-a7] arguments for the interface implementation target +// +// On Exit (to ResolveWorkerAsmStub): +// t5 contains the address of the indirection and the flags in the low two bits. +// t2 contains our contract (DispatchToken) +// t4 will be trashed +// + +#define BACKPATCH_FLAG 1 +#define PROMOTE_CHAIN_FLAG 2 + +NESTED_ENTRY ResolveWorkerChainLookupAsmStub, _TEXT, NoHandler + andi t4, t5, BACKPATCH_FLAG // First we check if t5 has the BACKPATCH_FLAG set + bne t4, zero, LOCAL_LABEL(Fail) // If the BACKPATCH_FLAGS is set we will go directly to the ResolveWorkerAsmStub + + ld t6, 0(a0) // retrieve the MethodTable from the object ref in a0 +LOCAL_LABEL(MainLoop): + ld t1, (ResolveCacheElem__pNext)(t1) // t1 <= the next entry in the chain + beq t1, zero, LOCAL_LABEL(Fail) + + ld t4, 0(t1) + // compare our MT with the one in the ResolveCacheElem + bne t4, t6, LOCAL_LABEL(MainLoop) + + ld t4, 8(t1) + // compare our DispatchToken with one in the ResolveCacheElem + bne t2, t4, LOCAL_LABEL(MainLoop) + +LOCAL_LABEL(Success): + PREPARE_EXTERNAL_VAR g_dispatch_cache_chain_success_counter, t6 + ld t4, 0(t6) + addi t4, t4, -1 + sd t4, 0(t6) + blt t4, zero, LOCAL_LABEL(Promote) + + ld t4, (ResolveCacheElem__target)(t1) // get the ImplTarget + jalr x0, t4, 0 // branch to interface implementation target + +LOCAL_LABEL(Promote): + // Move this entry to head position of the chain + addi t4, zero, 256 + sd t4, 0(t6) // be quick to reset the counter so we don't get a bunch of contending threads + ori t5, t5, PROMOTE_CHAIN_FLAG // set PROMOTE_CHAIN_FLAG + addi t2, t1, 0 // We pass the ResolveCacheElem to ResolveWorkerAsmStub instead of the DispatchToken + +LOCAL_LABEL(Fail): + tail C_FUNC(ResolveWorkerAsmStub) // call the ResolveWorkerAsmStub method to transition into the VM +NESTED_END ResolveWorkerChainLookupAsmStub, _TEXT + +// ------------------------------------------------------------------ +// void ResolveWorkerAsmStub(args in regs a0-a7 & stack, t5:IndirectionCellAndFlags, t2:DispatchToken) +// +// The stub dispatch thunk which transfers control to VSD_ResolveWorker. +NESTED_ENTRY ResolveWorkerAsmStub, _TEXT, NoHandler + PROLOG_WITH_TRANSITION_BLOCK + + addi a2, t2, 0 // DispatchToken + addi a0, sp, __PWTB_TransitionBlock // pTransitionBlock + srli a1, t5, 2 + andi a3, t5, 3 // flag + slli a1, a1, 2 + call C_FUNC(VSD_ResolveWorker) + addi t4, a0, 0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + + EPILOG_BRANCH_REG t4 +NESTED_END ResolveWorkerAsmStub, _TEXT + +// ------------------------------------------------------------------ +// void* JIT_GetSharedNonGCStaticBaseNoCtor(SIZE_T moduleDomainID, DWORD dwClassDomainID) + +LEAF_ENTRY JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain, _TEXT + jalr x0, ra, 0 +LEAF_END JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain, _TEXT + +// ------------------------------------------------------------------ +// void* JIT_GetSharedGCStaticBaseNoCtor(SIZE_T moduleDomainID, DWORD dwClassDomainID) + +LEAF_ENTRY JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain, _TEXT + ld a0, (DomainLocalModule__m_pGCStatics)(a0) + jalr x0, ra, 0 +LEAF_END JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain, _TEXT + + +#ifdef FEATURE_HIJACK +// ------------------------------------------------------------------ +// Hijack function for functions which return a scalar type or a struct (value type) +NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 0x88 + + // Spill callee saved registers + PROLOG_SAVE_REG_PAIR s1, s2, 16 + PROLOG_SAVE_REG_PAIR s3, s4, 32 + PROLOG_SAVE_REG_PAIR s5, s6, 48 + PROLOG_SAVE_REG_PAIR s7, s8, 64 + PROLOG_SAVE_REG_PAIR s9, s10, 80 + PROLOG_SAVE_REG s11, 96 + + // save any integral return value(s) + sd a0, 104(sp) + sd a1, 112(sp) + + // save any FP/HFA return value(s) + fsd f0, 120(sp) + fsd f1, 128(sp) + + addi a0, sp, 0 + call C_FUNC(OnHijackWorker) + + // restore callee saved registers + + // restore any integral return value(s) + ld a0, 104(sp) + ld a1, 112(sp) + + // restore any FP/HFA return value(s) + fld f0, 120(sp) + fld f1, 128(sp) + + EPILOG_RESTORE_REG_PAIR s1, s2, 16 + EPILOG_RESTORE_REG_PAIR s3, s4, 32 + EPILOG_RESTORE_REG_PAIR s5, s6, 48 + EPILOG_RESTORE_REG_PAIR s7, s8, 64 + EPILOG_RESTORE_REG_PAIR s9, s10, 80 + EPILOG_RESTORE_REG s11, 96 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 0x88 + EPILOG_RETURN +NESTED_END OnHijackTripThread, _TEXT + +#endif // FEATURE_HIJACK + +// ------------------------------------------------------------------ +// Redirection Stub for GC in fully interruptible method +//GenerateRedirectedHandledJITCaseStub GCThreadControl +// ------------------------------------------------------------------ +//GenerateRedirectedHandledJITCaseStub DbgThreadControl +// ------------------------------------------------------------------ +//GenerateRedirectedHandledJITCaseStub UserSuspend + +#ifdef _DEBUG +// ------------------------------------------------------------------ +// Redirection Stub for GC Stress +GenerateRedirectedHandledJITCaseStub GCStress +#endif + + +// ------------------------------------------------------------------ +// This helper enables us to call into a funclet after restoring Fp register +NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler + // On entry: + // + // a0 = throwable + // a1 = PC to invoke + // a2 = address of s0 register in CONTEXT record// used to restore the non-volatile registers of CrawlFrame + // a3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // + + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 120, 0 + + // Spill callee saved registers + PROLOG_SAVE_REG_PAIR s1, s2, 16 + PROLOG_SAVE_REG_PAIR s3, s4, 32 + PROLOG_SAVE_REG_PAIR s5, s6, 48 + PROLOG_SAVE_REG_PAIR s7, s8, 64 + PROLOG_SAVE_REG_PAIR s9, s10, 80 + PROLOG_SAVE_REG_PAIR s11, gp, 96 + PROLOG_SAVE_REG tp, 112 + + // Save the SP of this function + sd sp, 0(a3) + + ld gp, (-40)(a2) // offset of tp in PCONTEXT relative to S0. + ld tp, (-32)(a2) // offset of tp in PCONTEXT relative to S0. + ld fp, 0(a2) // offset of fp in PCONTEXT relative to S0. + ld s1, 8(a2) + ld s2, 80(a2) + ld s3, 88(a2) + ld s4, 96(a2) + ld s5, 104(a2) + ld s6, 112(a2) + ld s7, 120(a2) + ld s8, 128(a2) + ld s9, 136(a2) + ld s10, 144(a2) + ld s11, 152(a2) + + // Invoke the funclet + jalr ra, a1, 0 + + EPILOG_RESTORE_REG_PAIR s1, s2, 16 + EPILOG_RESTORE_REG_PAIR s3, s4, 32 + EPILOG_RESTORE_REG_PAIR s5, s6, 48 + EPILOG_RESTORE_REG_PAIR s7, s8, 64 + EPILOG_RESTORE_REG_PAIR s9, s10, 80 + EPILOG_RESTORE_REG_PAIR s11, gp, 96 + EPILOG_RESTORE_REG tp, 112 + + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 120 + EPILOG_RETURN +NESTED_END CallEHFunclet, _TEXT + +// This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the +// frame pointer for accessing the locals in the parent method. +NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 16 + + // On entry: + // + // a0 = throwable + // a1 = SP of the caller of the method/funclet containing the filter + // a2 = PC to invoke + // a3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // + // Save the SP of this function + sd fp, 0(a3) + // Invoke the filter funclet + jalr ra, a2, 0 + + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 16 + EPILOG_RETURN +NESTED_END CallEHFilterFunclet, _TEXT + +#ifdef FEATURE_COMINTEROP +// Function used by COM interop to get floating point return value (since it's not in the same +// register(s) as non-floating point values). +// +// On entry// +// a0 : size of the FP result (4 or 8 bytes) +// a1 : pointer to 64-bit buffer to receive result +// +// On exit: +// buffer pointed to by a1 on entry contains the float or double argument as appropriate +// +LEAF_ENTRY getFPReturn, _TEXT + fsd f0, 0(a1) +LEAF_END getFPReturn, _TEXT + +// ------------------------------------------------------------------ +// Function used by COM interop to set floating point return value (since it's not in the same +// register(s) as non-floating point values). +// +LEAF_ENTRY setFPReturn, _TEXT + fmv.d.x f0, a1 +LEAF_END setFPReturn, _TEXT + +#endif // FEATURE_COMINTEROP + +// +// JIT Static access helpers when coreclr host specifies single appdomain flag +// + +// ------------------------------------------------------------------ +// void* JIT_GetSharedNonGCStaticBase(SIZE_T moduleDomainID, DWORD dwClassDomainID) + +LEAF_ENTRY JIT_GetSharedNonGCStaticBase_SingleAppDomain, _TEXT + // If class is not initialized, bail to C++ helper + // dext a1, a1, 0, 32 + addi a2, a0, DomainLocalModule__m_pDataBlob + + add a2, a2, a1 + lb a2, 0(a2) + andi t4, a2, 1 + beq t4, zero, LOCAL_LABEL(JIT_GetSharedNonGCStaticBase_SingleAppDomain_CallHelper) + + jalr x0, ra, 0 + +LOCAL_LABEL(JIT_GetSharedNonGCStaticBase_SingleAppDomain_CallHelper): + // Tail call JIT_GetSharedNonGCStaticBase_Helper + tail JIT_GetSharedNonGCStaticBase_Helper +LEAF_END JIT_GetSharedNonGCStaticBase_SingleAppDomain, _TEXT + +#ifdef FEATURE_READYTORUN + +NESTED_ENTRY DelayLoad_MethodCall_FakeProlog, _TEXT, NoHandler +C_FUNC(DelayLoad_MethodCall): + .global C_FUNC(DelayLoad_MethodCall) + PROLOG_WITH_TRANSITION_BLOCK + + addi a1, t5, 0 // Indirection cell + addi a2, t0, 0 // sectionIndex + addi a3, t1, 0 // Module* + + addi a0, sp, __PWTB_TransitionBlock // pTransitionBlock + call C_FUNC(ExternalMethodFixupWorker) + addi t4, a0, 0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + PATCH_LABEL ExternalMethodFixupPatchLabel + EPILOG_BRANCH_REG t4 +NESTED_END DelayLoad_MethodCall_FakeProlog, _TEXT + + +.macro DynamicHelper frameFlags, suffix +NESTED_ENTRY DelayLoad_Helper\suffix\()_FakeProlog, _TEXT, NoHandler +DelayLoad_Helper\suffix: + .global DelayLoad_Helper\suffix + + PROLOG_WITH_TRANSITION_BLOCK + + //DynamicHelperWorker(TransitionBlock * pTransitionBlock, TADDR * pCell, + // DWORD sectionIndex, Module * pModule, INT frameFlags) + addi a1, t5, 0 // Indirection cell + addi a2, t0, 0 // sectionIndex + addi a3, t1, 0 // Module* + addi a4, x0, \frameFlags + + addi a0, sp, __PWTB_TransitionBlock // pTransitionBlock + call DynamicHelperWorker + + bne a0, x0, LOCAL_LABEL(FakeProlog\suffix\()_0) + + ld a0, __PWTB_ArgumentRegisters(sp) + EPILOG_WITH_TRANSITION_BLOCK_RETURN +LOCAL_LABEL(FakeProlog\suffix\()_0): + addi t4, a0, 0 + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + EPILOG_BRANCH_REG t4 + +NESTED_END DelayLoad_Helper\suffix\()_FakeProlog, _TEXT +.endm + +DynamicHelper DynamicHelperFrameFlags_Default +DynamicHelper DynamicHelperFrameFlags_ObjectArg, _Obj +DynamicHelper DynamicHelperFrameFlags_ObjectArg | DynamicHelperFrameFlags_ObjectArg2, _ObjObj +#endif + + +#ifdef PROFILING_SUPPORTED + +// ------------------------------------------------------------------ +LEAF_ENTRY JIT_ProfilerEnterLeaveTailcallStub, _TEXT + jalr x0, ra, 0 +LEAF_END JIT_ProfilerEnterLeaveTailcallStub, _TEXT + +// ------------------------------------------------------------------ +#define PROFILE_ENTER 1 +#define PROFILE_LEAVE 2 +#define PROFILE_TAILCALL 4 +#define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 272 + +// ------------------------------------------------------------------ +.macro GenerateProfileHelper helper, flags +NESTED_ENTRY \helper\()Naked, _TEXT, NoHandler + // On entry: + // t1 = functionIDOrClientID + // t2 = profiledSp + // t6 = throwable + // + // On exit: + // Values of a0-a7, fa0-fa7, fp are preserved. + // Values of other volatile registers are not preserved. + + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA // Allocate space and save Fp, Pc. + SAVE_ARGUMENT_REGISTERS sp, 16 // Save t0 and argument registers (a0-a7). + sd zero, 88(sp) // Clear functionId. + SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96 // Save floating-point/SIMD registers (fa0-fa7). + addi t6, fp, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA // Compute probeSp - initial value of Sp on entry to the helper. + sd t6, 224(sp) // Save probeSp. + sd t2, 232(sp) // Save profiledSp. + + sd zero, 240(sp) // Clear hiddenArg. + addi t6, zero, \flags + sw t6, 248(sp) // Save flags. + sw zero, 252(sp) // clear unused field. + + addi a1, t1, 0 + addi a2, sp, 0 + call C_FUNC(\helper) + + RESTORE_ARGUMENT_REGISTERS sp, 16 // Restore t0 and argument registers. + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96 // Restore floating-point/SIMD registers. + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA + EPILOG_RETURN + +NESTED_END \helper\()Naked, _TEXT +.endm + +GenerateProfileHelper ProfileEnter, PROFILE_ENTER +GenerateProfileHelper ProfileLeave, PROFILE_LEAVE +GenerateProfileHelper ProfileTailcall, PROFILE_TAILCALL + +#endif // PROFILING_SUPPORTED + + +#ifdef FEATURE_TIERED_COMPILATION + +NESTED_ENTRY OnCallCountThresholdReachedStub, _TEXT, NoHandler + PROLOG_WITH_TRANSITION_BLOCK + + addi a0, sp, __PWTB_TransitionBlock // TransitionBlock * + addi a1, t3, 0 // stub-identifying token + call C_FUNC(OnCallCountThresholdReached) + addi t4, a0, 0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + EPILOG_BRANCH_REG t4 +NESTED_END OnCallCountThresholdReachedStub, _TEXT + +#endif // FEATURE_TIERED_COMPILATION diff --git a/src/coreclr/vm/riscv64/calldescrworkerloongarch64.S b/src/coreclr/vm/riscv64/calldescrworkerloongarch64.S deleted file mode 100644 index a7cd5b6c4d2403..00000000000000 --- a/src/coreclr/vm/riscv64/calldescrworkerloongarch64.S +++ /dev/null @@ -1,7 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include "unixasmmacros.inc" -#include "asmconstants.h" - -#error "TODO-RISCV64: missing implementation" diff --git a/src/coreclr/vm/riscv64/calldescrworkerriscv64.S b/src/coreclr/vm/riscv64/calldescrworkerriscv64.S new file mode 100644 index 00000000000000..38f248fd632d4f --- /dev/null +++ b/src/coreclr/vm/riscv64/calldescrworkerriscv64.S @@ -0,0 +1,205 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "unixasmmacros.inc" +#include "asmconstants.h" + +//----------------------------------------------------------------------------- +// This helper routine enregisters the appropriate arguments and makes the +// actual call. +//----------------------------------------------------------------------------- +//void CallDescrWorkerInternal(CallDescrData * pCallDescrData); + +NESTED_ENTRY CallDescrWorkerInternal, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 0x20 + PROLOG_SAVE_REG s1, 16 + + lwu a1, CallDescrData__numStackSlots(a0) + + addi s1, a0, 0 // save pCallDescrData in s1 + beq a1, zero, LOCAL_LABEL(donestack) + + slli a2, a1, 3 + andi a0, a2, 0x8 + sub t4, sp, a0 // padding on high-addr + add a0, a0, a2 + sub sp, sp, a0 // stack-16byte aligned + + ld a0, CallDescrData__pSrc(s1) + + add a2, a0, a2 // pSrcEnd=pSrc+8*numStackSlots + + // This loop copies numStackSlots words + // from [pSrcEnd-8,pSrcEnd-16,...] to [sp-8,sp-16,...] +LOCAL_LABEL(stackloop): + addi a2, a2, -8 + ld a4, 0(a2) + addi t4, t4, -8 + sd a4, 0(t4) + addi a1, a1, -1 + bne a1, zero, LOCAL_LABEL(stackloop) + +LOCAL_LABEL(donestack): + // If FP arguments are supplied in registers (t4 != NULL) + ld t4, CallDescrData__pFloatArgumentRegisters(s1) + beq t4, zero, LOCAL_LABEL(NoFloatingPoint) + + fld fa0, 0(t4) + fld fa1, 8(t4) + fld fa2, 16(t4) + fld fa3, 24(t4) + fld fa4, 32(t4) + fld fa5, 40(t4) + fld fa6, 48(t4) + fld fa7, 56(t4) + +LOCAL_LABEL(NoFloatingPoint): + // Copy [pArgumentRegisters, ..., pArgumentRegisters + 56] + // into a0, ..., a7 + ld t4, CallDescrData__pArgumentRegisters(s1) + ld a0, 0(t4) + ld a1, 8(t4) + ld a2, 16(t4) + ld a3, 24(t4) + ld a4, 32(t4) + ld a5, 40(t4) + ld a6, 48(t4) + ld a7, 56(t4) + + ld t4, CallDescrData__pTarget(s1) + + // call pTarget + jalr ra, 0(t4) + + lw a3, CallDescrData__fpReturnSize(s1) + + // Int return case + beq a3, zero, LOCAL_LABEL(IntReturn) + + // Struct with Float/Double field return case. + ori t4, zero, CallDescrData__flagOneFloat + beq t4, a3, LOCAL_LABEL(FloatReturn) + + ori t4, zero, CallDescrData__flagOneDouble + beq t4, a3, LOCAL_LABEL(DoubleReturn) + + ori t4, zero, CallDescrData__flagFloatInt + beq t4, a3, LOCAL_LABEL(FloatIntReturn) + + ori t4, zero, CallDescrData__flagDoubleInt + beq t4, a3, LOCAL_LABEL(DoubleIntReturn) + + ori t4, zero, CallDescrData__flagFloatLong + beq t4, a3, LOCAL_LABEL(FloatLongReturn) + + ori t4, zero, CallDescrData__flagDoubleLong + beq t4, a3, LOCAL_LABEL(DoubleLongReturn) + + ori t4, zero, CallDescrData__flagIntFloat + beq t4, a3, LOCAL_LABEL(IntFloatReturn) + + ori t4, zero, CallDescrData__flagLongFloat + beq t4, a3, LOCAL_LABEL(LongFloatReturn) + + ori t4, zero, CallDescrData__flagIntDouble + beq t4, a3, LOCAL_LABEL(IntDoubleReturn) + + ori t4, zero, CallDescrData__flagLongDouble + beq t4, a3, LOCAL_LABEL(LongDoubleReturn) + + ori t4, zero, CallDescrData__flagFloatFloat + beq t4, a3, LOCAL_LABEL(FloatFloatReturn) + + ori t4, zero, CallDescrData__flagDoubleFloat + beq t4, a3, LOCAL_LABEL(DoubleFloatReturn) + + ori t4, zero, CallDescrData__flagFloatDouble + beq t4, a3, LOCAL_LABEL(FloatDoubleReturn) + + ori t4, zero, CallDescrData__flagDoubleDouble + beq t4, a3, LOCAL_LABEL(DoubleDoubleReturn) + +LOCAL_LABEL(NotCorrectReturn): + sw ra, 0(zero) + EMIT_BREAKPOINT // Unreachable + +LOCAL_LABEL(FloatReturn): + fsw f0, CallDescrData__returnValue(s1) + jal x0, LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(DoubleReturn): + fsd fa0, CallDescrData__returnValue(s1) + jal x0, LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(FloatIntReturn): + fsw fa0, CallDescrData__returnValue(s1) + sw a0, (CallDescrData__returnValue + 4)(s1) + jal x0, LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(DoubleIntReturn): + fsd fa0, CallDescrData__returnValue(s1) + sw a0, (CallDescrData__returnValue + 8)(s1) + jal x0, LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(FloatLongReturn): + fsw fa0, CallDescrData__returnValue(s1) + sd a0, (CallDescrData__returnValue + 8)(s1) + jal x0, LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(DoubleLongReturn): + fsd fa0, CallDescrData__returnValue(s1) + sd a0, (CallDescrData__returnValue + 8)(s1) + jal x0, LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(IntFloatReturn): + sw a0, CallDescrData__returnValue(s1) + fsw fa0, (CallDescrData__returnValue + 4)(s1) + jal x0, LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(LongFloatReturn): + sd a0, CallDescrData__returnValue(s1) + fsw fa0, (CallDescrData__returnValue + 8)(s1) + jal x0, LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(IntDoubleReturn): + sw a0, CallDescrData__returnValue(s1) + fsd fa0, (CallDescrData__returnValue + 8)(s1) + jal x0, LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(LongDoubleReturn): + sd a0, CallDescrData__returnValue(s1) + fsd fa0, (CallDescrData__returnValue + 8)(s1) + jal x0, LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(FloatFloatReturn): + fsw fa0, CallDescrData__returnValue(s1) + fsw fa1, (CallDescrData__returnValue + 4)(s1) + jal x0, LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(DoubleFloatReturn): + fsd fa0, CallDescrData__returnValue(s1) + fsw fa1, (CallDescrData__returnValue + 8)(s1) + jal x0, LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(FloatDoubleReturn): + fsw fa0, CallDescrData__returnValue(s1) + fsd fa1, (CallDescrData__returnValue + 8)(s1) + jal x0, LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(DoubleDoubleReturn): + fsd fa0, CallDescrData__returnValue(s1) + fsd fa1, (CallDescrData__returnValue + 8)(s1) + jal x0, LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(IntReturn): + // Save return value into retbuf for int + sd a0, CallDescrData__returnValue(s1) + sd a1, (CallDescrData__returnValue + 8)(s1) + +LOCAL_LABEL(ReturnDone): + + EPILOG_STACK_RESTORE + EPILOG_RESTORE_REG s1, 16 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 0x20 + jalr x0, ra, 0 +NESTED_END CallDescrWorkerInternal, _TEXT diff --git a/src/coreclr/vm/riscv64/cgencpu.h b/src/coreclr/vm/riscv64/cgencpu.h index 9a0cdd4e406d37..f085013f0be3a0 100644 --- a/src/coreclr/vm/riscv64/cgencpu.h +++ b/src/coreclr/vm/riscv64/cgencpu.h @@ -1,4 +1,505 @@ -// Licensed to the .NET Foundation under one or more agreements. + // The .NET Foundation licenses this file to you under the MIT license. -#error "TODO-RISCV64: missing implementation" +// #error "TODO-RISCV64: missing implementation" + +#ifndef TARGET_RISCV64 +#error Should only include "cGenCpu.h" for RISCV64 builds +#endif + +#ifndef __cgencpu_h__ +#define __cgencpu_h__ + +#define INSTRFMT_K64 +#include + +#ifndef TARGET_UNIX +#define USE_REDIRECT_FOR_GCSTRESS +#endif // TARGET_UNIX + +EXTERN_C void getFPReturn(int fpSize, INT64 *pRetVal); +EXTERN_C void setFPReturn(int fpSize, INT64 retVal); + + +class ComCallMethodDesc; + +extern PCODE GetPreStubEntryPoint(); + +#define COMMETHOD_PREPAD 24 // # extra bytes to allocate in addition to sizeof(ComCallMethodDesc) +#ifdef FEATURE_COMINTEROP +#define COMMETHOD_CALL_PRESTUB_SIZE 24 +#define COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET 16 // the offset of the call target address inside the prestub +#endif // FEATURE_COMINTEROP + +#define STACK_ALIGN_SIZE 16 + +#define JUMP_ALLOCATE_SIZE 16 // # bytes to allocate for a jump instruction +#define BACK_TO_BACK_JUMP_ALLOCATE_SIZE 16 // # bytes to allocate for a back to back jump instruction + +#define HAS_NDIRECT_IMPORT_PRECODE 1 + +#define USE_INDIRECT_CODEHEADER + +#define HAS_FIXUP_PRECODE 1 + +// ThisPtrRetBufPrecode one is necessary for closed delegates over static methods with return buffer +#define HAS_THISPTR_RETBUF_PRECODE 1 + +#define CODE_SIZE_ALIGN 8 +#define CACHE_LINE_SIZE 64 +#define LOG2SLOT LOG2_PTRSIZE + +#define ENREGISTERED_RETURNTYPE_MAXSIZE 16 // bytes (two FP registers: f10 and f11 +#define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE 16 // bytes (two int registers: a0 and a1) +#define ENREGISTERED_PARAMTYPE_MAXSIZE 16 // bytes (max value type size that can be passed by value) + +#define CALLDESCR_ARGREGS 1 // CallDescrWorker has ArgumentRegister parameter +#define CALLDESCR_FPARGREGS 1 // CallDescrWorker has FloatArgumentRegisters parameter +// #define CALLDESCR_RETBUFFARGREG 1 // CallDescrWorker has RetBuffArg parameter that's separate from arg regs // TODO RISCV64 + +#define FLOAT_REGISTER_SIZE 16 // each register in FloatArgumentRegisters is 16 bytes. + +// Given a return address retrieved during stackwalk, +// this is the offset by which it should be decremented to arrive at the callsite. +#define STACKWALK_CONTROLPC_ADJUST_OFFSET 4 + +//********************************************************************** +// Parameter size +//********************************************************************** + +inline unsigned StackElemSize(unsigned parmSize, bool isValueType, bool isFloatHfa) +{ + const unsigned stackSlotSize = 8; + return ALIGN_UP(parmSize, stackSlotSize); +} + +// +// JIT HELPERS. +// +// Create alias for optimized implementations of helpers provided on this platform +// +#define JIT_GetSharedGCStaticBase JIT_GetSharedGCStaticBase_SingleAppDomain +#define JIT_GetSharedNonGCStaticBase JIT_GetSharedNonGCStaticBase_SingleAppDomain +#define JIT_GetSharedGCStaticBaseNoCtor JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain +#define JIT_GetSharedNonGCStaticBaseNoCtor JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain + +//********************************************************************** +// Frames +//********************************************************************** + +//-------------------------------------------------------------------- +// This represents the callee saved (non-volatile) integer registers saved as +// of a FramedMethodFrame. +//-------------------------------------------------------------------- +typedef DPTR(struct CalleeSavedRegisters) PTR_CalleeSavedRegisters; +struct CalleeSavedRegisters { + INT64 sp; // stack pointer + INT64 fp; // frame pointer + INT64 s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11; + INT64 tp, gp; +}; + +//-------------------------------------------------------------------- +// This represents the arguments that are stored in volatile integer registers. +// This should not overlap the CalleeSavedRegisters since those are already +// saved separately and it would be wasteful to save the same register twice. +// If we do use a non-volatile register as an argument, then the ArgIterator +// will probably have to communicate this back to the PromoteCallerStack +// routine to avoid a double promotion. +//-------------------------------------------------------------------- +#define NUM_ARGUMENT_REGISTERS 8 +typedef DPTR(struct ArgumentRegisters) PTR_ArgumentRegisters; +struct ArgumentRegisters { + INT64 a[8]; // a0 ....a7 +}; + +#define ARGUMENTREGISTERS_SIZE sizeof(ArgumentRegisters) + + +//-------------------------------------------------------------------- +// This represents the floating point argument registers which are saved +// as part of the NegInfo for a FramedMethodFrame. Note that these +// might not be saved by all stubs: typically only those that call into +// C++ helpers will need to preserve the values in these volatile +// registers. +//-------------------------------------------------------------------- +#define NUM_FLOAT_ARGUMENT_REGISTERS 8 +typedef DPTR(struct FloatArgumentRegisters) PTR_FloatArgumentRegisters; +struct FloatArgumentRegisters { + //TODO: not supports RISCV64-SIMD. + double f[8]; // f0-f7 +}; + + +//********************************************************************** +// Exception handling +//********************************************************************** + +inline PCODE GetIP(const T_CONTEXT * context) { + LIMITED_METHOD_DAC_CONTRACT; + return context->Pc; +} + +inline void SetIP(T_CONTEXT *context, PCODE ip) { + LIMITED_METHOD_DAC_CONTRACT; + context->Pc = ip; +} + +inline TADDR GetSP(const T_CONTEXT * context) { + LIMITED_METHOD_DAC_CONTRACT; + return TADDR(context->Sp); +} + +inline TADDR GetRA(const T_CONTEXT * context) { + LIMITED_METHOD_DAC_CONTRACT; + return context->Ra; +} + +inline void SetRA( T_CONTEXT * context, TADDR ip) { + LIMITED_METHOD_DAC_CONTRACT; + context->Ra = ip; +} + +inline TADDR GetReg(T_CONTEXT * context, int Regnum) +{ + LIMITED_METHOD_DAC_CONTRACT; + _ASSERTE(Regnum >= 0 && Regnum < 32 ); + return (TADDR)(&context->R0 + Regnum); +} + +inline void SetReg(T_CONTEXT * context, int Regnum, PCODE RegContent) +{ + LIMITED_METHOD_DAC_CONTRACT; + _ASSERTE(Regnum >= 0 && Regnum <=28 ); + *(&context->R0 + Regnum) = RegContent; +} + +extern "C" LPVOID __stdcall GetCurrentSP(); + +inline void SetSP(T_CONTEXT *context, TADDR sp) { + LIMITED_METHOD_DAC_CONTRACT; + context->Sp = DWORD64(sp); +} + +inline void SetFP(T_CONTEXT *context, TADDR fp) { + LIMITED_METHOD_DAC_CONTRACT; + context->Fp = DWORD64(fp); +} + +inline TADDR GetFP(const T_CONTEXT * context) +{ + LIMITED_METHOD_DAC_CONTRACT; + return (TADDR)(context->Fp); +} + + +inline TADDR GetMem(PCODE address, SIZE_T size, bool signExtend) +{ + TADDR mem; + LIMITED_METHOD_DAC_CONTRACT; + EX_TRY + { + switch (size) + { + case 4: + if (signExtend) + mem = *(int32_t*)address; + else + mem = *(uint32_t*)address; + break; + case 8: + mem = *(uint64_t*)address; + break; + default: + UNREACHABLE(); + } + } + EX_CATCH + { + mem = NULL; + _ASSERTE(!"Memory read within jitted Code Failed, this should not happen!!!!"); + } + EX_END_CATCH(SwallowAllExceptions); + return mem; +} + +#ifdef FEATURE_COMINTEROP +void emitCOMStubCall (ComCallMethodDesc *pCOMMethodRX, ComCallMethodDesc *pCOMMethodRW, PCODE target); +#endif // FEATURE_COMINTEROP + +inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode, bool hasCodeExecutedBefore = false) +{ + return FlushInstructionCache(GetCurrentProcess(), pCodeAddr, sizeOfCode); +} + +//------------------------------------------------------------------------ +inline void emitJump(LPBYTE pBufferRX, LPBYTE pBufferRW, LPVOID target) +{ + LIMITED_METHOD_CONTRACT; + UINT32* pCode = (UINT32*)pBufferRW; + + // We require 8-byte alignment so the LD instruction is aligned properly + _ASSERTE(((UINT_PTR)pCode & 7) == 0); + + // auipc ra, 0 + // ld ra, ra, 16 + // jalr r0, ra, 0 + // nop //padding. + + pCode[0] = 0x00000097; // auipc ra, 0 + pCode[1] = 0x0100b083; // ld ra, 16(ra) + pCode[2] = 0x00008067; // jalr x0, ra, 0 + pCode[3] = 0x00000013; // padding nop. Also used for isJump. + + // Ensure that the updated instructions get updated in the I-Cache + ClrFlushInstructionCache(pBufferRX, 16); + + *((LPVOID *)(pCode + 4)) = target; // 64-bit target address +} + +//------------------------------------------------------------------------ +// Given the same pBuffer that was used by emitJump this method +// decodes the instructions and returns the jump target +inline PCODE decodeJump(PCODE pCode) +{ + LIMITED_METHOD_CONTRACT; + + TADDR pInstr = PCODEToPINSTR(pCode); + + return *dac_cast(pInstr + 2*sizeof(DWORD)); +} + +//------------------------------------------------------------------------ +inline BOOL isJump(PCODE pCode) +{ + LIMITED_METHOD_DAC_CONTRACT; + + TADDR pInstr = PCODEToPINSTR(pCode); + + return *dac_cast(pInstr) == 0x58000050; +} + +//------------------------------------------------------------------------ +inline BOOL isBackToBackJump(PCODE pBuffer) +{ + WRAPPER_NO_CONTRACT; + SUPPORTS_DAC; + return isJump(pBuffer); +} + +//------------------------------------------------------------------------ +inline void emitBackToBackJump(LPBYTE pBufferRX, LPBYTE pBufferRW, LPVOID target) +{ + WRAPPER_NO_CONTRACT; + emitJump(pBufferRX, pBufferRW, target); +} + +//------------------------------------------------------------------------ +inline PCODE decodeBackToBackJump(PCODE pBuffer) +{ + WRAPPER_NO_CONTRACT; + return decodeJump(pBuffer); +} + +//---------------------------------------------------------------------- + +struct IntReg +{ + int reg; + IntReg(int reg):reg(reg) + { + _ASSERTE(0 <= reg && reg < 32); + } + + operator int () { return reg; } + operator int () const { return reg; } + int operator == (IntReg other) { return reg == other.reg; } + int operator != (IntReg other) { return reg != other.reg; } + WORD Mask() const { return 1 << reg; } +}; + +struct FloatReg +{ + int reg; + FloatReg(int reg):reg(reg) + { + _ASSERTE(0 <= reg && reg < 32); + } + + operator int () { return reg; } + operator int () const { return reg; } + int operator == (FloatReg other) { return reg == other.reg; } + int operator != (FloatReg other) { return reg != other.reg; } + WORD Mask() const { return 1 << reg; } +}; + +struct CondCode +{ + int cond; + CondCode(int cond):cond(cond) + { + _ASSERTE(0 <= cond && cond < 16); + } +}; + +const IntReg RegSp = IntReg(2); +const IntReg RegFp = IntReg(8); +const IntReg RegRa = IntReg(1); + +#define GetEEFuncEntryPoint(pfn) GFN_TADDR(pfn) + +#if 1 // TODO RISCV64 +class StubLinkerCPU : public StubLinker +{ + +public: + + // BitFlags for EmitLoadStoreReg(Pair)Imm methods + enum { + eSTORE = 0x0, + eLOAD = 0x1, + }; + + static void Init(); + static bool isValidSimm12(int value) { + return -( ((int)1) << 11 ) <= value && value < ( ((int)1) << 11 ); + } + + void EmitCallManagedMethod(MethodDesc *pMD, BOOL fTailCall); + void EmitCallLabel(CodeLabel *target, BOOL fTailCall, BOOL fIndirect); + + void EmitShuffleThunk(struct ShuffleEntry *pShuffleEntryArray); + +#if defined(FEATURE_SHARE_GENERIC_CODE) + void EmitComputedInstantiatingMethodStub(MethodDesc* pSharedMD, struct ShuffleEntry *pShuffleEntryArray, void* extraArg); +#endif // FEATURE_SHARE_GENERIC_CODE + +#ifdef _DEBUG + void EmitNop() { _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); } +#endif + void EmitBreakPoint() { _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); } + void EmitMovConstant(IntReg target, UINT64 constant); + void EmitCmpImm(IntReg reg, int imm); + void EmitCmpReg(IntReg Xn, IntReg Xm); + void EmitCondFlagJump(CodeLabel * target, UINT cond); + void EmitJumpRegister(IntReg regTarget); + void EmitMovReg(IntReg dest, IntReg source); + + void EmitSubImm(IntReg Xd, IntReg Xn, unsigned int value); + void EmitAddImm(IntReg Xd, IntReg Xn, unsigned int value); + + void EmitLoadStoreRegPairImm(DWORD flags, IntReg Xt1, IntReg Xt2, IntReg Xn, int offset=0); + void EmitLoadStoreRegPairImm(DWORD flags, FloatReg Ft1, FloatReg Ft2, IntReg Xn, int offset=0); + + void EmitLoadStoreRegImm(DWORD flags, IntReg Xt, IntReg Xn, int offset=0); + void EmitLoadStoreRegImm(DWORD flags, FloatReg Ft, IntReg Xn, int offset=0); + + void EmitLoadFloatRegImm(FloatReg ft, IntReg base, int offset); + + void EmitCallRegister(IntReg reg); + void EmitRet(IntReg reg); +}; +#endif + +extern "C" void SinglecastDelegateInvokeStub(); + + +// preferred alignment for data +#define DATA_ALIGNMENT 8 + +// TODO RISCV64 +struct DECLSPEC_ALIGN(16) UMEntryThunkCode +{ + DWORD m_code[4]; + + TADDR m_pTargetCode; + TADDR m_pvSecretParam; + + void Encode(UMEntryThunkCode *pEntryThunkCodeRX, BYTE* pTargetCode, void* pvSecretParam); + void Poison(); + + LPCBYTE GetEntryPoint() const + { + LIMITED_METHOD_CONTRACT; + + return (LPCBYTE)this; + } + + static int GetEntryPointOffset() + { + LIMITED_METHOD_CONTRACT; + + return 0; + } +}; + +struct HijackArgs +{ + union + { + struct { + DWORD64 A0; + DWORD64 A1; + }; + size_t ReturnValue[2]; + }; + union + { + struct { + DWORD64 FA0; + DWORD64 FA1; + }; + size_t FPReturnValue[2]; + }; + DWORD64 Fp; // frame pointer + DWORD64 Gp, Tp, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11; + union + { + DWORD64 Ra; + size_t ReturnAddress; + }; + }; + +// Precode to shuffle this and retbuf for closed delegates over static methods with return buffer +struct ThisPtrRetBufPrecode { + + static const int Type = 0x2; + + UINT32 m_rgCode[6]; + TADDR m_pTarget; + TADDR m_pMethodDesc; + + void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator); + + TADDR GetMethodDesc() + { + LIMITED_METHOD_DAC_CONTRACT; + + return m_pMethodDesc; + } + + PCODE GetTarget() + { + LIMITED_METHOD_DAC_CONTRACT; + return m_pTarget; + } + +#ifndef DACCESS_COMPILE + BOOL SetTargetInterlocked(TADDR target, TADDR expected) + { + CONTRACTL + { + THROWS; + GC_NOTRIGGER; + } + CONTRACTL_END; + + ExecutableWriterHolder precodeWriterHolder(this, sizeof(ThisPtrRetBufPrecode)); + return (TADDR)InterlockedCompareExchange64( + (LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)target, (TADDR)expected) == expected; + } +#endif // !DACCESS_COMPILE +}; +typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode; + +#endif // __cgencpu_h__ diff --git a/src/coreclr/vm/riscv64/crthelpers.S b/src/coreclr/vm/riscv64/crthelpers.S index f6c1fb2c96ce46..d4152a96c77015 100644 --- a/src/coreclr/vm/riscv64/crthelpers.S +++ b/src/coreclr/vm/riscv64/crthelpers.S @@ -3,4 +3,34 @@ #include "unixasmmacros.inc" -#error "TODO-RISCV64: missing implementation" +// JIT_MemSet/JIT_MemCpy +// +// It is IMPORTANT that the exception handling code is able to find these guys +// on the stack, but on non-windows platforms we can just defer to the platform +// implementation. +// +LEAF_ENTRY JIT_MemSet, _TEXT + beq a2, zero, LOCAL_LABEL(JIT_MemSet_ret) + + lb zero, 0(a0) // Is this really needed ? + + tail memset + +LOCAL_LABEL(JIT_MemSet_ret): + jalr x0, 0(ra) +LEAF_END_MARKED JIT_MemSet, _TEXT + +////NOTE: Here must use LEAF_END_MARKED! not LEAF_END !!! +LEAF_ENTRY JIT_MemCpy, _TEXT + beq a2, zero, LOCAL_LABEL(JIT_MemCpy_ret) + + lb zero, 0(a0) + lb zero, 0(a1) // Is this really needed ? + + tail memcpy + +LOCAL_LABEL(JIT_MemCpy_ret): + jalr x0, 0(ra) + +////NOTE: Here must use LEAF_END_MARKED! not LEAF_END !!! +LEAF_END_MARKED JIT_MemCpy, _TEXT diff --git a/src/coreclr/vm/riscv64/excepcpu.h b/src/coreclr/vm/riscv64/excepcpu.h index 4800154434a53a..eb575235af8feb 100644 --- a/src/coreclr/vm/riscv64/excepcpu.h +++ b/src/coreclr/vm/riscv64/excepcpu.h @@ -1,7 +1,50 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +// +// + #ifndef __excepcpu_h__ #define __excepcpu_h__ -#error "TODO-RISCV64: missing implementation" + +#define THROW_CONTROL_FOR_THREAD_FUNCTION RedirectForThreadAbort +EXTERN_C void RedirectForThreadAbort(); + + +#define STATUS_CLR_GCCOVER_CODE STATUS_ILLEGAL_INSTRUCTION + +class Thread; +class FaultingExceptionFrame; + +#define INSTALL_EXCEPTION_HANDLING_RECORD(record) +#define UNINSTALL_EXCEPTION_HANDLING_RECORD(record) +// +// On ARM, the COMPlusFrameHandler's work is done by our personality routine. +// +#define DECLARE_CPFH_EH_RECORD(pCurThread) + +// +// Retrieves the redirected CONTEXT* from the stack frame of one of the +// RedirectedHandledJITCaseForXXX_Stub's. +// +PTR_CONTEXT GetCONTEXTFromRedirectedStubStackFrame(T_DISPATCHER_CONTEXT * pDispatcherContext); +PTR_CONTEXT GetCONTEXTFromRedirectedStubStackFrame(T_CONTEXT * pContext); + +// +// Retrieves the FaultingExceptionFrame* from the stack frame of +// RedirectForThrowControl. +// +FaultingExceptionFrame *GetFrameFromRedirectedStubStackFrame (T_DISPATCHER_CONTEXT *pDispatcherContext); + +inline +PCODE GetAdjustedCallAddress(PCODE returnAddress) +{ + LIMITED_METHOD_CONTRACT; + + return returnAddress - 4; +} + +BOOL AdjustContextForVirtualStub(EXCEPTION_RECORD *pExceptionRecord, T_CONTEXT *pContext); + +#endif // __excepcpu_h__ diff --git a/src/coreclr/vm/riscv64/gmscpu.h b/src/coreclr/vm/riscv64/gmscpu.h index 9a0cdd4e406d37..6506b10b8f751c 100644 --- a/src/coreclr/vm/riscv64/gmscpu.h +++ b/src/coreclr/vm/riscv64/gmscpu.h @@ -1,4 +1,102 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -#error "TODO-RISCV64: missing implementation" +/**************************************************************/ +/* gmscpu.h */ +/**************************************************************/ +/* HelperFrame is defines 'GET_STATE(machState)' macro, which + figures out what the state of the machine will be when the + current method returns. It then stores the state in the + JIT_machState structure. */ + +/**************************************************************/ + +#ifndef __gmscpu_h__ +#define __gmscpu_h__ + +#define __gmscpu_h__ + +// CalleeSaveRegisters +#define NUM_NONVOLATILE_CONTEXT_POINTERS 14 + +struct MachState { + ULONG64 captureCalleeSavedRegisters[NUM_NONVOLATILE_CONTEXT_POINTERS]; // preserved registers + PTR_ULONG64 ptrCalleeSavedRegisters[NUM_NONVOLATILE_CONTEXT_POINTERS]; // pointers to preserved registers + TADDR _pc; // program counter after the function returns + TADDR _sp; // stack pointer after the function returns + BOOL _isValid; + + BOOL isValid() { LIMITED_METHOD_DAC_CONTRACT; return _isValid; } + TADDR GetRetAddr() { LIMITED_METHOD_DAC_CONTRACT; return _pc; } +}; + +struct LazyMachState : public MachState{ + + TADDR captureSp; // Stack pointer at the time of capture + TADDR captureIp; // Instruction pointer at the time of capture + + void setLazyStateFromUnwind(MachState* copy); + static void unwindLazyState(LazyMachState* baseState, + MachState* lazyState, + DWORD threadId, + int funCallDepth = 1, + HostCallPreference hostCallPreference = AllowHostCalls); +}; + +inline void LazyMachState::setLazyStateFromUnwind(MachState* copy) +{ +#if defined(DACCESS_COMPILE) + // This function cannot be called in DAC because DAC cannot update target memory. + DacError(E_FAIL); + return; + +#else // !DACCESS_COMPILE + + _sp = copy->_sp; + _pc = copy->_pc; + + // Capture* has already been set, so there is no need to touch it + + // loop over the nonvolatile context pointers and make + // sure to properly copy interior pointers into the + // new struct + + PULONG64* pSrc = (PULONG64 *)©->ptrCalleeSavedRegisters; + PULONG64* pDst = (PULONG64 *)&this->ptrCalleeSavedRegisters; + + const PULONG64 LowerBoundDst = (PULONG64) this; + const PULONG64 LowerBoundSrc = (PULONG64) copy; + + const PULONG64 UpperBoundSrc = (PULONG64) ((BYTE*)LowerBoundSrc + sizeof(*copy)); + + for (int i = 0; i < NUM_NONVOLATILE_CONTEXT_POINTERS; i++) + { + PULONG64 valueSrc = *pSrc++; + + if ((LowerBoundSrc <= valueSrc) && (valueSrc < UpperBoundSrc)) + { + // make any pointer interior to 'src' interior to 'dst' + valueSrc = (PULONG64)((BYTE*)valueSrc - (BYTE*)LowerBoundSrc + (BYTE*)LowerBoundDst); + } + + *pDst++ = valueSrc; + captureCalleeSavedRegisters[i] = copy->captureCalleeSavedRegisters[i]; + } + + + // this has to be last because we depend on write ordering to + // synchronize the race implicit in updating this struct + VolatileStore(&_isValid, TRUE); +#endif // DACCESS_COMPILE +} + +// Do the initial capture of the machine state. This is meant to be +// as light weight as possible, as we may never need the state that +// we capture. +EXTERN_C void LazyMachStateCaptureState(struct LazyMachState *pState); + +#define CAPTURE_STATE(machState, ret) \ + LazyMachStateCaptureState(machState) + + +#endif diff --git a/src/coreclr/vm/riscv64/pinvokestubs.S b/src/coreclr/vm/riscv64/pinvokestubs.S index 3515f38c8120d7..ea5d245c56a0d6 100644 --- a/src/coreclr/vm/riscv64/pinvokestubs.S +++ b/src/coreclr/vm/riscv64/pinvokestubs.S @@ -4,4 +4,185 @@ #include "asmconstants.h" #include "unixasmmacros.inc" -#error "TODO-RISCV64: missing implementation" +// ------------------------------------------------------------------ +// Macro to generate PInvoke Stubs. +// __PInvokeStubFuncName : function which calls the actual stub obtained from VASigCookie +// __PInvokeGenStubFuncName : function which generates the IL stubs for PInvoke +// +// Params :- +// FuncPrefix : prefix of the function name for the stub +// Eg. VarargPinvoke, GenericPInvokeCalli +// VASigCookieReg : register which contains the VASigCookie +// SaveFPArgs : "Yes" or "No" . For varidic functions FP Args are not present in FP regs +// So need not save FP Args registers for vararg Pinvoke +.macro PINVOKE_STUB __PInvokeStubFuncName,__PInvokeGenStubFuncName,__PInvokeStubWorkerName,VASigCookieReg,HiddenArg,SaveFPArgs,ShiftLeftAndOrSecret=0 + + + NESTED_ENTRY \__PInvokeStubFuncName, _TEXT, NoHandler + + // get the stub + ld t0, (VASigCookie__pNDirectILStub)(\VASigCookieReg) + + // if null goto stub generation + beq t0, zero, \__PInvokeGenStubFuncName + + .if (\ShiftLeftAndOrSecret == 1) + // + // We need to distinguish between a MethodDesc* and an unmanaged target. + // The way we do this is to shift the managed target to the left by one bit and then set the + // least significant bit to 1. This works because MethodDesc* are always 8-byte aligned. + // + slli \HiddenArg, \HiddenArg, 1 + ori \HiddenArg, \HiddenArg, 1 + .endif + + jalr x0, t0, 0 + NESTED_END \__PInvokeStubFuncName, _TEXT + + NESTED_ENTRY \__PInvokeGenStubFuncName, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK 0, 0, \SaveFPArgs + + // a2 = Umanaged Target\MethodDesc + addi a2, \HiddenArg, 0 + + // a1 = VaSigCookie + addi a1, \VASigCookieReg, 0 + + // a0 = pTransitionBlock + addi a0, sp, __PWTB_TransitionBlock + + // save hidden arg + addi s1, \HiddenArg, 0 + + // save VASigCookieReg + addi s2, \VASigCookieReg, 0 + + call C_FUNC(\__PInvokeStubWorkerName) + + // restore VASigCookieReg + addi \VASigCookieReg, s2, 0 + + // restore hidden arg (method desc or unmanaged target) + addi \HiddenArg, s1, 0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + + EPILOG_BRANCH C_FUNC(\__PInvokeStubFuncName) + NESTED_END \__PInvokeGenStubFuncName, _TEXT +.endm + +// ------------------------------------------------------------------ +// IN: +// InlinedCallFrame (x0) = pointer to the InlinedCallFrame data, including the GS cookie slot (GS cookie right +// before actual InlinedCallFrame data) +// +// + NESTED_ENTRY JIT_PInvokeBegin, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 32 + PROLOG_SAVE_REG s1, 16 // the stack slot at sp+24 is empty for 16 byte alignment + + PREPARE_EXTERNAL_VAR s_gsCookie, t0 + ld t4, 0(t0) + sd t4, 0(a0) + addi s1, a0, SIZEOF__GSCookie + + // s1 = pFrame + // set first slot to the value of InlinedCallFrame::`vftable' (checked by runtime code) + PREPARE_EXTERNAL_VAR _ZTV16InlinedCallFrame, t0 + addi t4, t0, 16 + sd t4, 0(s1) + + sd zero, (InlinedCallFrame__m_Datum)(s1) + + addi t0, sp, 32 + sd t0, (InlinedCallFrame__m_pCallSiteSP)(s1) + sd ra, (InlinedCallFrame__m_pCallerReturnAddress)(s1) + + ld t4, 0(sp) + sd t4, (InlinedCallFrame__m_pCalleeSavedFP)(s1) + + // v0 = GetThread() + call GetThreadHelper + + sd a0, (InlinedCallFrame__m_pThread)(s1) + + // pFrame->m_Next = pThread->m_pFrame; + ld t4, Thread_m_pFrame(a0) + sd t4, Frame__m_Next(s1) + + // pThread->m_pFrame = pFrame; + sd s1, (Thread_m_pFrame)(a0) + + // pThread->m_fPreemptiveGCDisabled = 0 + sw zero, (Thread_m_fPreemptiveGCDisabled)(a0) + + EPILOG_RESTORE_REG s1, 16 //the stack slot at sp+24 is empty for 16 byte alignment + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 32 + EPILOG_RETURN + + NESTED_END JIT_PInvokeBegin, _TEXT + +// ------------------------------------------------------------------ +// IN: +// InlinedCallFrame (x0) = pointer to the InlinedCallFrame data, including the GS cookie slot (GS cookie right +// before actual InlinedCallFrame data) +// +// + LEAF_ENTRY JIT_PInvokeEnd, _TEXT + + addi a0, a0, SIZEOF__GSCookie + ld a1, (InlinedCallFrame__m_pThread)(a0) + // a0 = pFrame + // a1 = pThread + + // pThread->m_fPreemptiveGCDisabled = 1 + ori t4, x0, 1 + sw t4, (Thread_m_fPreemptiveGCDisabled)(a1) + + // Check return trap + PREPARE_EXTERNAL_VAR g_TrapReturningThreads, t0 + lw t4, 0(t0) + bne t4, zero, LOCAL_LABEL(RarePath) + + // pThread->m_pFrame = pFrame->m_Next + ld t4, (Frame__m_Next)(a0) + sd t4, (Thread_m_pFrame)(a1) + + jalr x0, ra, 0 + +LOCAL_LABEL(RarePath): + tail JIT_PInvokeEndRarePath + +LEAF_END JIT_PInvokeEnd, _TEXT + +// ------------------------------------------------------------------ +// VarargPInvokeStub & VarargPInvokeGenILStub +// There is a separate stub when the method has a hidden return buffer arg. +// +// in: +// a0 = VASigCookie* +// t2 = MethodDesc * +// +PINVOKE_STUB VarargPInvokeStub, VarargPInvokeGenILStub, VarargPInvokeStubWorker, a0, t2, 0 + + +// ------------------------------------------------------------------ +// GenericPInvokeCalliHelper & GenericPInvokeCalliGenILStub +// Helper for generic pinvoke calli instruction +// +// in: +// t3 = VASigCookie* +// t2 = Unmanaged target +// +PINVOKE_STUB GenericPInvokeCalliHelper, GenericPInvokeCalliGenILStub, GenericPInvokeCalliStubWorker, t3, t2, 1, 1 + +//// ------------------------------------------------------------------ +//// VarargPInvokeStub_RetBuffArg & VarargPInvokeGenILStub_RetBuffArg +//// Vararg PInvoke Stub when the method has a hidden return buffer arg +//// +//// in: +//// a1 = VASigCookie* //not used ??? +//// t2 = MethodDesc* +//// +//PINVOKE_STUB VarargPInvokeStub_RetBuffArg, VarargPInvokeGenILStub_RetBuffArg, VarargPInvokeStubWorker, a1, t8, 0 diff --git a/src/coreclr/vm/riscv64/profiler.cpp b/src/coreclr/vm/riscv64/profiler.cpp index 8d7dc92a1a59e7..e8e0d11ca159c2 100644 --- a/src/coreclr/vm/riscv64/profiler.cpp +++ b/src/coreclr/vm/riscv64/profiler.cpp @@ -3,4 +3,310 @@ #include "common.h" -#error "TODO-RISCV64: missing implementation" +// #error "TODO-RISCV64: missing implementation" +#ifdef PROFILING_SUPPORTED +#include "proftoeeinterfaceimpl.h" + +#define PROFILE_ENTER 1 +#define PROFILE_LEAVE 2 +#define PROFILE_TAILCALL 4 + +// Scratch space to store HFA return values (max 16 bytes) +#define PROFILE_PLATFORM_SPECIFIC_DATA_BUFFER_SIZE 16 + +typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA +{ + void* Fp; + void* Pc; + void* x8; + ArgumentRegisters argumentRegisters; + FunctionID functionId; + FloatArgumentRegisters floatArgumentRegisters; + void* probeSp; + void* profiledSp; + void* hiddenArg; + UINT32 flags; + UINT32 unused; + BYTE buffer[PROFILE_PLATFORM_SPECIFIC_DATA_BUFFER_SIZE]; +} PROFILE_PLATFORM_SPECIFIC_DATA, *PPROFILE_PLATFORM_SPECIFIC_DATA; + +UINT_PTR ProfileGetIPFromPlatformSpecificHandle(void* pPlatformSpecificHandle) +{ + _ASSERTE(!"TODO RISCV64 NYI"); + LIMITED_METHOD_CONTRACT; + + PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast(pPlatformSpecificHandle); + return (UINT_PTR)pData->Pc; +} + +void ProfileSetFunctionIDInPlatformSpecificHandle(void* pPlatformSpecificHandle, FunctionID functionId) +{ + _ASSERTE(!"TODO RISCV64 NYI"); + LIMITED_METHOD_CONTRACT; + + _ASSERTE(pPlatformSpecificHandle != nullptr); + _ASSERTE(functionId != 0); + + PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast(pPlatformSpecificHandle); + pData->functionId = functionId; +} + +ProfileArgIterator::ProfileArgIterator(MetaSig* pSig, void* pPlatformSpecificHandle) + : m_argIterator(pSig) +{ + _ASSERTE(!"TODO RISCV64 NYI"); + WRAPPER_NO_CONTRACT; + + _ASSERTE(pSig != nullptr); + _ASSERTE(pPlatformSpecificHandle != nullptr); + + m_handle = pPlatformSpecificHandle; + + PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast(pPlatformSpecificHandle); +#ifdef _DEBUG + // Unwind a frame and get the SP for the profiled method to make sure it matches + // what the JIT gave us + + // Setup the context to represent the frame that called ProfileEnterNaked + CONTEXT ctx; + memset(&ctx, 0, sizeof(CONTEXT)); + + ctx.Sp = (DWORD64)pData->probeSp; + ctx.Fp = (DWORD64)pData->Fp; + ctx.Pc = (DWORD64)pData->Pc; + + // Walk up a frame to the caller frame (called the managed method which called ProfileEnterNaked) + Thread::VirtualUnwindCallFrame(&ctx); + + _ASSERTE(pData->profiledSp == (void*)ctx.Sp); +#endif + + // Get the hidden arg if there is one + MethodDesc* pMD = FunctionIdToMethodDesc(pData->functionId); + + if ((pData->hiddenArg == nullptr) && (pMD->RequiresInstArg() || pMD->AcquiresInstMethodTableFromThis())) + { + if ((pData->flags & PROFILE_ENTER) != 0) + { + if (pMD->AcquiresInstMethodTableFromThis()) + { + pData->hiddenArg = GetThis(); + } + else + { + // On ARM64 the generic instantiation parameter comes after the optional "this" pointer. + if (m_argIterator.HasThis()) + { + pData->hiddenArg = (void*)pData->argumentRegisters.a[1]; + } + else + { + pData->hiddenArg = (void*)pData->argumentRegisters.a[0]; + } + } + } + else + { + EECodeInfo codeInfo((PCODE)pData->Pc); + + // We want to pass the caller SP here. + pData->hiddenArg = EECodeManager::GetExactGenericsToken((SIZE_T)(pData->profiledSp), &codeInfo); + } + } +} + +ProfileArgIterator::~ProfileArgIterator() +{ + _ASSERTE(!"TODO RISCV64 NYI"); + LIMITED_METHOD_CONTRACT; + + m_handle = nullptr; +} + +LPVOID ProfileArgIterator::GetNextArgAddr() +{ + _ASSERTE(!"TODO RISCV64 NYI"); + WRAPPER_NO_CONTRACT; + + _ASSERTE(m_handle != nullptr); + + PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast(m_handle); + + if ((pData->flags & (PROFILE_LEAVE | PROFILE_TAILCALL)) != 0) + { + _ASSERTE(!"GetNextArgAddr() - arguments are not available in leave and tailcall probes"); + return nullptr; + } + + int argOffset = m_argIterator.GetNextOffset(); + + if (argOffset == TransitionBlock::InvalidOffset) + { + return nullptr; + } + + if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) + { + return (LPBYTE)&pData->floatArgumentRegisters + (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()); + } + + LPVOID pArg = nullptr; + + if (TransitionBlock::IsArgumentRegisterOffset(argOffset)) + { + pArg = (LPBYTE)&pData->argumentRegisters + (argOffset - TransitionBlock::GetOffsetOfArgumentRegisters()); + } + else + { + _ASSERTE(TransitionBlock::IsStackArgumentOffset(argOffset)); + + pArg = (LPBYTE)pData->profiledSp + (argOffset - TransitionBlock::GetOffsetOfArgs()); + } + + if (m_argIterator.IsArgPassedByRef()) + { + pArg = *(LPVOID*)pArg; + } + + return pArg; +} + +LPVOID ProfileArgIterator::GetHiddenArgValue(void) +{ + _ASSERTE(!"TODO RISCV64 NYI"); + LIMITED_METHOD_CONTRACT; + + PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast(m_handle); + + return pData->hiddenArg; +} + +LPVOID ProfileArgIterator::GetThis(void) +{ + _ASSERTE(!"TODO RISCV64 NYI"); + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + PROFILE_PLATFORM_SPECIFIC_DATA* pData = (PROFILE_PLATFORM_SPECIFIC_DATA*)m_handle; + MethodDesc* pMD = FunctionIdToMethodDesc(pData->functionId); + + // We guarantee to return the correct "this" pointer in the enter probe. + // For the leave and tailcall probes, we only return a valid "this" pointer if it is the generics token. + if (pData->hiddenArg != nullptr) + { + if (pMD->AcquiresInstMethodTableFromThis()) + { + return pData->hiddenArg; + } + } + + if ((pData->flags & PROFILE_ENTER) != 0) + { + if (m_argIterator.HasThis()) + { + return (LPVOID)pData->argumentRegisters.a[0]; + } + } + + return nullptr; +} + +LPVOID ProfileArgIterator::GetReturnBufferAddr(void) +{ + _ASSERTE(!"TODO RISCV64 NYI"); + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast(m_handle); + + if ((pData->flags & PROFILE_TAILCALL) != 0) + { + _ASSERTE(!"GetReturnBufferAddr() - return buffer address is not available in tailcall probe"); + return nullptr; + } + + if (m_argIterator.HasRetBuffArg()) + { + if ((pData->flags & PROFILE_ENTER) != 0) + { + return (LPVOID)pData->x8; + } + else + { + // On ARM64 there is no requirement for the method to preserve the value stored in x8. + // In order to workaround this JIT will explicitly return the return buffer address in x0. + _ASSERTE((pData->flags & PROFILE_LEAVE) != 0); + return (LPVOID)pData->argumentRegisters.a[0]; + } + } + + UINT fpReturnSize = m_argIterator.GetFPReturnSize(); + if (fpReturnSize != 0) + { + TypeHandle thReturnValueType; + m_argIterator.GetSig()->GetReturnTypeNormalized(&thReturnValueType); + if (!thReturnValueType.IsNull() && thReturnValueType.IsHFA()) + { + UINT hfaFieldSize = fpReturnSize / 4; + UINT totalSize = m_argIterator.GetSig()->GetReturnTypeSize(); + _ASSERTE(totalSize % hfaFieldSize == 0); + _ASSERTE(totalSize <= 16); + + BYTE *dest = pData->buffer; + for (UINT floatRegIdx = 0; floatRegIdx < totalSize / hfaFieldSize; ++floatRegIdx) + { + if (hfaFieldSize == 4) + { + *(UINT32*)dest = *(UINT32*)&pData->floatArgumentRegisters.f[floatRegIdx]; + dest += 4; + } + else if (hfaFieldSize == 8) + { + *(UINT64*)dest = *(UINT64*)&pData->floatArgumentRegisters.f[floatRegIdx]; + dest += 8; + } + else + { + _ASSERTE(!"unimplemented on RISCV64 yet!"); +#if 0 + _ASSERTE(hfaFieldSize == 16); + *(NEON128*)dest = pData->floatArgumentRegisters.f[floatRegIdx]; + dest += 16; +#endif + } + + if (floatRegIdx > 8) + { + // There's only space for 8 arguments in buffer + _ASSERTE(FALSE); + break; + } + } + + return pData->buffer; + } + + return &pData->floatArgumentRegisters.f[0]; + } + + if (!m_argIterator.GetSig()->IsReturnTypeVoid()) + { + return &pData->argumentRegisters.a[0]; + } + + return nullptr; +} + +#undef PROFILE_ENTER +#undef PROFILE_LEAVE +#undef PROFILE_TAILCALL + +#endif // PROFILING_SUPPORTED diff --git a/src/coreclr/vm/riscv64/stubs.cpp b/src/coreclr/vm/riscv64/stubs.cpp index 9a0cdd4e406d37..9d058bcad3197f 100644 --- a/src/coreclr/vm/riscv64/stubs.cpp +++ b/src/coreclr/vm/riscv64/stubs.cpp @@ -1,4 +1,1253 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +// +// File: stubs.cpp +// +// This file contains stub functions for unimplemented features need to +// run on the ARM64 platform. -#error "TODO-RISCV64: missing implementation" +#include "common.h" +#include "dllimportcallback.h" +#include "comdelegate.h" +#include "asmconstants.h" +#include "virtualcallstub.h" +#include "jitinterface.h" +#include "ecall.h" + + +#ifndef DACCESS_COMPILE +//----------------------------------------------------------------------- +// InstructionFormat for JAL/JALR (unconditional jump) +//----------------------------------------------------------------------- +class BranchInstructionFormat : public InstructionFormat +{ + // Encoding of the VariationCode: + // bit(0) indicates whether this is a direct or an indirect jump. + // bit(1) indicates whether this is a branch with link -a.k.a call- + + public: + enum VariationCodes + { + BIF_VAR_INDIRECT = 0x00000001, + BIF_VAR_CALL = 0x00000002, + + BIF_VAR_JUMP = 0x00000000, + BIF_VAR_INDIRECT_CALL = 0x00000003 + }; + private: + BOOL IsIndirect(UINT variationCode) + { + return (variationCode & BIF_VAR_INDIRECT) != 0; + } + BOOL IsCall(UINT variationCode) + { + return (variationCode & BIF_VAR_CALL) != 0; + } + + + public: + BranchInstructionFormat() : InstructionFormat(InstructionFormat::k64) + { + LIMITED_METHOD_CONTRACT; + } + + virtual UINT GetSizeOfInstruction(UINT refSize, UINT variationCode) + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(refSize == InstructionFormat::k64); + + if (IsIndirect(variationCode)) + return 16; + else + return 12; + } + + virtual UINT GetSizeOfData(UINT refSize, UINT variationCode) + { + WRAPPER_NO_CONTRACT; + return 8; + } + + + virtual UINT GetHotSpotOffset(UINT refsize, UINT variationCode) + { + WRAPPER_NO_CONTRACT; + return 0; + } + + virtual BOOL CanReach(UINT refSize, UINT variationCode, BOOL fExternal, INT_PTR offset) + { + if (fExternal) + { + // Note that the parameter 'offset' is not an offset but the target address itself (when fExternal is true) + return (refSize == InstructionFormat::k64); + } + else + { + return ((offset >= -0x80000000L && offset <= 0x7fffffff) || (refSize == InstructionFormat::k64)); + } + } + + virtual VOID EmitInstruction(UINT refSize, __int64 fixedUpReference, BYTE *pOutBufferRX, BYTE *pOutBufferRW, UINT variationCode, BYTE *pDataBuffer) + { + LIMITED_METHOD_CONTRACT; + + if (IsIndirect(variationCode)) + { + _ASSERTE(((UINT_PTR)pDataBuffer & 7) == 0); + + __int64 dataOffset = pDataBuffer - pOutBufferRW; + + if ((dataOffset < -(0x80000000L)) || (dataOffset > 0x7fffffff)) + COMPlusThrow(kNotSupportedException); + + UINT16 imm12 = (UINT16)(0xFFF & dataOffset); + //auipc t1, dataOffset[31:12] + //ld t1, t1, dataOffset[11:0] + //ld t1, t1, 0 + //jalr x0/1, t1,0 + + *(DWORD*)pOutBufferRW = 0x00000317 | (((dataOffset + 0x800) >> 12) << 12); // auipc t1, dataOffset[31:12] + *(DWORD*)(pOutBufferRW + 4) = 0x00033303 | (imm12 << 20); // ld t1, t1, dataOffset[11:0] + *(DWORD*)(pOutBufferRW + 8) = 0x00033303; // ld t1, 0(t1) + if (IsCall(variationCode)) + { + *(DWORD*)(pOutBufferRW + 12) = 0x000300e7; // jalr ra, t1, 0 + } + else + { + *(DWORD*)(pOutBufferRW + 12) = 0x00030067 ;// jalr x0, t1,0 + } + + *((__int64*)pDataBuffer) = fixedUpReference + (__int64)pOutBufferRX; + } + else + { + _ASSERTE(((UINT_PTR)pDataBuffer & 7) == 0); + + __int64 dataOffset = pDataBuffer - pOutBufferRW; + + if ((dataOffset < -(0x80000000L)) || (dataOffset > 0x7fffffff)) + COMPlusThrow(kNotSupportedException); + + UINT16 imm12 = (UINT16)(0xFFF & dataOffset); + //auipc t1, dataOffset[31:12] + //ld t1, t1, dataOffset[11:0] + //jalr x0/1, t1,0 + + *(DWORD*)pOutBufferRW = 0x00000317 | (((dataOffset + 0x800) >> 12) << 12);// auipc t1, dataOffset[31:12] + *(DWORD*)(pOutBufferRW + 4) = 0x00033303 | (imm12 << 20); // ld t1, t1, dataOffset[11:0] + if (IsCall(variationCode)) + { + *(DWORD*)(pOutBufferRW + 8) = 0x000300e7; // jalr ra, t1, 0 + } + else + { + *(DWORD*)(pOutBufferRW + 8) = 0x00030067 ;// jalr x0, t1,0 + } + + if (!ClrSafeInt<__int64>::addition(fixedUpReference, (__int64)pOutBufferRX, fixedUpReference)) + COMPlusThrowArithmetic(); + *((__int64*)pDataBuffer) = fixedUpReference; + } + } +}; + +static BYTE gBranchIF[sizeof(BranchInstructionFormat)]; + +#endif + +void ClearRegDisplayArgumentAndScratchRegisters(REGDISPLAY * pRD) +{ + pRD->volatileCurrContextPointers.R0 = NULL; + pRD->volatileCurrContextPointers.A0 = NULL; + pRD->volatileCurrContextPointers.A1 = NULL; + pRD->volatileCurrContextPointers.A2 = NULL; + pRD->volatileCurrContextPointers.A3 = NULL; + pRD->volatileCurrContextPointers.A4 = NULL; + pRD->volatileCurrContextPointers.A5 = NULL; + pRD->volatileCurrContextPointers.A6 = NULL; + pRD->volatileCurrContextPointers.A7 = NULL; + pRD->volatileCurrContextPointers.T0 = NULL; + pRD->volatileCurrContextPointers.T1 = NULL; + pRD->volatileCurrContextPointers.T2 = NULL; + pRD->volatileCurrContextPointers.T3 = NULL; + pRD->volatileCurrContextPointers.T4 = NULL; + pRD->volatileCurrContextPointers.T5 = NULL; + pRD->volatileCurrContextPointers.T6 = NULL; +} + +void LazyMachState::unwindLazyState(LazyMachState* baseState, + MachState* unwoundstate, + DWORD threadId, + int funCallDepth, + HostCallPreference hostCallPreference) +{ + T_CONTEXT context; + T_KNONVOLATILE_CONTEXT_POINTERS nonVolContextPtrs; + + context.ContextFlags = 0; // Read by PAL_VirtualUnwind. + + context.Fp = unwoundstate->captureCalleeSavedRegisters[0] = baseState->captureCalleeSavedRegisters[0]; + context.S1 = unwoundstate->captureCalleeSavedRegisters[1] = baseState->captureCalleeSavedRegisters[1]; + context.S2 = unwoundstate->captureCalleeSavedRegisters[2] = baseState->captureCalleeSavedRegisters[2]; + context.S3 = unwoundstate->captureCalleeSavedRegisters[3] = baseState->captureCalleeSavedRegisters[3]; + context.S4 = unwoundstate->captureCalleeSavedRegisters[4] = baseState->captureCalleeSavedRegisters[4]; + context.S5 = unwoundstate->captureCalleeSavedRegisters[5] = baseState->captureCalleeSavedRegisters[5]; + context.S6 = unwoundstate->captureCalleeSavedRegisters[6] = baseState->captureCalleeSavedRegisters[6]; + context.S7 = unwoundstate->captureCalleeSavedRegisters[7] = baseState->captureCalleeSavedRegisters[7]; + context.S8 = unwoundstate->captureCalleeSavedRegisters[8] = baseState->captureCalleeSavedRegisters[8]; + context.S9 = unwoundstate->captureCalleeSavedRegisters[9] = baseState->captureCalleeSavedRegisters[9]; + context.S10 = unwoundstate->captureCalleeSavedRegisters[10] = baseState->captureCalleeSavedRegisters[10]; + context.S11 = unwoundstate->captureCalleeSavedRegisters[11] = baseState->captureCalleeSavedRegisters[11]; + context.Gp = unwoundstate->captureCalleeSavedRegisters[12] = baseState->captureCalleeSavedRegisters[12]; + context.Tp = unwoundstate->captureCalleeSavedRegisters[13] = baseState->captureCalleeSavedRegisters[13]; + context.Ra = NULL; // Filled by the unwinder + + context.Sp = baseState->captureSp; + context.Pc = baseState->captureIp; + +#if !defined(DACCESS_COMPILE) + // For DAC, if we get here, it means that the LazyMachState is uninitialized and we have to unwind it. + // The API we use to unwind in DAC is StackWalk64(), which does not support the context pointers. + // + // Restore the integer registers to KNONVOLATILE_CONTEXT_POINTERS to be used for unwinding. + nonVolContextPtrs.Fp = &unwoundstate->captureCalleeSavedRegisters[0]; + nonVolContextPtrs.S1 = &unwoundstate->captureCalleeSavedRegisters[1]; + nonVolContextPtrs.S2 = &unwoundstate->captureCalleeSavedRegisters[2]; + nonVolContextPtrs.S3 = &unwoundstate->captureCalleeSavedRegisters[3]; + nonVolContextPtrs.S4 = &unwoundstate->captureCalleeSavedRegisters[4]; + nonVolContextPtrs.S5 = &unwoundstate->captureCalleeSavedRegisters[5]; + nonVolContextPtrs.S6 = &unwoundstate->captureCalleeSavedRegisters[6]; + nonVolContextPtrs.S7 = &unwoundstate->captureCalleeSavedRegisters[7]; + nonVolContextPtrs.S8 = &unwoundstate->captureCalleeSavedRegisters[8]; + nonVolContextPtrs.S9 = &unwoundstate->captureCalleeSavedRegisters[9]; + nonVolContextPtrs.S10 = &unwoundstate->captureCalleeSavedRegisters[10]; + nonVolContextPtrs.S11 = &unwoundstate->captureCalleeSavedRegisters[11]; + nonVolContextPtrs.Gp = &unwoundstate->captureCalleeSavedRegisters[12]; + nonVolContextPtrs.Tp = &unwoundstate->captureCalleeSavedRegisters[13]; + nonVolContextPtrs.Ra = NULL; // Filled by the unwinder + +#endif // DACCESS_COMPILE + + LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK LazyMachState::unwindLazyState(ip:%p,sp:%p)\n", baseState->captureIp, baseState->captureSp)); + + PCODE pvControlPc; + + do { + +#ifndef TARGET_UNIX + pvControlPc = Thread::VirtualUnwindCallFrame(&context, &nonVolContextPtrs); +#else // !TARGET_UNIX +#ifdef DACCESS_COMPILE + HRESULT hr = DacVirtualUnwind(threadId, &context, &nonVolContextPtrs); + if (FAILED(hr)) + { + DacError(hr); + } +#else // DACCESS_COMPILE + BOOL success = PAL_VirtualUnwind(&context, &nonVolContextPtrs); + if (!success) + { + _ASSERTE(!"unwindLazyState: Unwinding failed"); + EEPOLICY_HANDLE_FATAL_ERROR(COR_E_EXECUTIONENGINE); + } +#endif // DACCESS_COMPILE + pvControlPc = GetIP(&context); +#endif // !TARGET_UNIX + + if (funCallDepth > 0) + { + funCallDepth--; + if (funCallDepth == 0) + break; + } + else + { + // Determine whether given IP resides in JITted code. (It returns nonzero in that case.) + // Use it now to see if we've unwound to managed code yet. + BOOL fFailedReaderLock = FALSE; + BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc, hostCallPreference, &fFailedReaderLock); + if (fFailedReaderLock) + { + // We don't know if we would have been able to find a JIT + // manager, because we couldn't enter the reader lock without + // yielding (and our caller doesn't want us to yield). So abort + // now. + + // Invalidate the lazyState we're returning, so the caller knows + // we aborted before we could fully unwind + unwoundstate->_isValid = false; + return; + } + + if (fIsManagedCode) + break; + + } + } while (true); + +#ifdef TARGET_UNIX + unwoundstate->captureCalleeSavedRegisters[0] = context.Fp; + unwoundstate->captureCalleeSavedRegisters[1] = context.S1; + unwoundstate->captureCalleeSavedRegisters[2] = context.S2; + unwoundstate->captureCalleeSavedRegisters[3] = context.S3; + unwoundstate->captureCalleeSavedRegisters[4] = context.S4; + unwoundstate->captureCalleeSavedRegisters[5] = context.S5; + unwoundstate->captureCalleeSavedRegisters[6] = context.S6; + unwoundstate->captureCalleeSavedRegisters[7] = context.S7; + unwoundstate->captureCalleeSavedRegisters[8] = context.S8; + unwoundstate->captureCalleeSavedRegisters[9] = context.S9; + unwoundstate->captureCalleeSavedRegisters[10] = context.S10; + unwoundstate->captureCalleeSavedRegisters[11] = context.S11; + unwoundstate->captureCalleeSavedRegisters[12] = context.Gp; + unwoundstate->captureCalleeSavedRegisters[13] = context.Tp; +#endif + +#ifdef DACCESS_COMPILE + // For DAC builds, we update the registers directly since we dont have context pointers + unwoundstate->captureCalleeSavedRegisters[0] = context.Fp; + unwoundstate->captureCalleeSavedRegisters[1] = context.S1; + unwoundstate->captureCalleeSavedRegisters[2] = context.S2; + unwoundstate->captureCalleeSavedRegisters[3] = context.S3; + unwoundstate->captureCalleeSavedRegisters[4] = context.S4; + unwoundstate->captureCalleeSavedRegisters[5] = context.S5; + unwoundstate->captureCalleeSavedRegisters[6] = context.S6; + unwoundstate->captureCalleeSavedRegisters[7] = context.S7; + unwoundstate->captureCalleeSavedRegisters[8] = context.S8; + unwoundstate->captureCalleeSavedRegisters[9] = context.S9; + unwoundstate->captureCalleeSavedRegisters[10] = context.S10; + unwoundstate->captureCalleeSavedRegisters[11] = context.S11; + unwoundstate->captureCalleeSavedRegisters[12] = context.Gp; + unwoundstate->captureCalleeSavedRegisters[13] = context.Tp; +#else // !DACCESS_COMPILE + // For non-DAC builds, update the register state from context pointers + unwoundstate->ptrCalleeSavedRegisters[0] = nonVolContextPtrs.Fp; + unwoundstate->ptrCalleeSavedRegisters[1] = nonVolContextPtrs.S1; + unwoundstate->ptrCalleeSavedRegisters[2] = nonVolContextPtrs.S2; + unwoundstate->ptrCalleeSavedRegisters[3] = nonVolContextPtrs.S3; + unwoundstate->ptrCalleeSavedRegisters[4] = nonVolContextPtrs.S4; + unwoundstate->ptrCalleeSavedRegisters[5] = nonVolContextPtrs.S5; + unwoundstate->ptrCalleeSavedRegisters[6] = nonVolContextPtrs.S6; + unwoundstate->ptrCalleeSavedRegisters[7] = nonVolContextPtrs.S7; + unwoundstate->ptrCalleeSavedRegisters[8] = nonVolContextPtrs.S8; + unwoundstate->ptrCalleeSavedRegisters[9] = nonVolContextPtrs.S9; + unwoundstate->ptrCalleeSavedRegisters[10] = nonVolContextPtrs.S10; + unwoundstate->ptrCalleeSavedRegisters[11] = nonVolContextPtrs.S11; + unwoundstate->ptrCalleeSavedRegisters[12] = nonVolContextPtrs.Gp; + unwoundstate->ptrCalleeSavedRegisters[13] = nonVolContextPtrs.Tp; +#endif // DACCESS_COMPILE + + unwoundstate->_pc = context.Pc; + unwoundstate->_sp = context.Sp; + + unwoundstate->_isValid = TRUE; +} + +void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + SUPPORTS_DAC; + } + CONTRACTL_END; + + pRD->IsCallerContextValid = FALSE; + pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. + + // + // Copy the saved state from the frame to the current context. + // + + LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK HelperMethodFrame::UpdateRegDisplay cached ip:%p, sp:%p\n", m_MachState._pc, m_MachState._sp)); + + #if defined(DACCESS_COMPILE) + // For DAC, we may get here when the HMF is still uninitialized. + // So we may need to unwind here. + if (!m_MachState.isValid()) + { + // This allocation throws on OOM. + MachState* pUnwoundState = (MachState*)DacAllocHostOnlyInstance(sizeof(*pUnwoundState), true); + + InsureInit(false, pUnwoundState); + + pRD->pCurrentContext->Pc = pRD->ControlPC = pUnwoundState->_pc; + pRD->pCurrentContext->Sp = pRD->SP = pUnwoundState->_sp; + pRD->pCurrentContext->Fp = (DWORD64)(pUnwoundState->captureCalleeSavedRegisters[0]); + pRD->pCurrentContext->S1 = (DWORD64)(pUnwoundState->captureCalleeSavedRegisters[1]); + pRD->pCurrentContext->S2 = (DWORD64)(pUnwoundState->captureCalleeSavedRegisters[2]); + pRD->pCurrentContext->S3 = (DWORD64)(pUnwoundState->captureCalleeSavedRegisters[3]); + pRD->pCurrentContext->S4 = (DWORD64)(pUnwoundState->captureCalleeSavedRegisters[4]); + pRD->pCurrentContext->S5 = (DWORD64)(pUnwoundState->captureCalleeSavedRegisters[5]); + pRD->pCurrentContext->S6 = (DWORD64)(pUnwoundState->captureCalleeSavedRegisters[6]); + pRD->pCurrentContext->S7 = (DWORD64)(pUnwoundState->captureCalleeSavedRegisters[7]); + pRD->pCurrentContext->S8 = (DWORD64)(pUnwoundState->captureCalleeSavedRegisters[8]); + pRD->pCurrentContext->S9 = (DWORD64)(pUnwoundState->captureCalleeSavedRegisters[9]); + pRD->pCurrentContext->S10 = (DWORD64)(pUnwoundState->captureCalleeSavedRegisters[10]); + pRD->pCurrentContext->S11 = (DWORD64)(pUnwoundState->captureCalleeSavedRegisters[11]); + pRD->pCurrentContext->Gp = (DWORD64)(pUnwoundState->captureCalleeSavedRegisters[12]); + pRD->pCurrentContext->Tp = (DWORD64)(pUnwoundState->captureCalleeSavedRegisters[13]); + pRD->pCurrentContext->Ra = NULL; // Unwind again to get Caller's PC + + pRD->pCurrentContextPointers->Fp = pUnwoundState->ptrCalleeSavedRegisters[0]; + pRD->pCurrentContextPointers->S1 = pUnwoundState->ptrCalleeSavedRegisters[1]; + pRD->pCurrentContextPointers->S2 = pUnwoundState->ptrCalleeSavedRegisters[2]; + pRD->pCurrentContextPointers->S3 = pUnwoundState->ptrCalleeSavedRegisters[3]; + pRD->pCurrentContextPointers->S4 = pUnwoundState->ptrCalleeSavedRegisters[4]; + pRD->pCurrentContextPointers->S5 = pUnwoundState->ptrCalleeSavedRegisters[5]; + pRD->pCurrentContextPointers->S6 = pUnwoundState->ptrCalleeSavedRegisters[6]; + pRD->pCurrentContextPointers->S7 = pUnwoundState->ptrCalleeSavedRegisters[7]; + pRD->pCurrentContextPointers->S8 = pUnwoundState->ptrCalleeSavedRegisters[8]; + pRD->pCurrentContextPointers->S9 = pUnwoundState->ptrCalleeSavedRegisters[9]; + pRD->pCurrentContextPointers->S10 = pUnwoundState->ptrCalleeSavedRegisters[10]; + pRD->pCurrentContextPointers->S11 = pUnwoundState->ptrCalleeSavedRegisters[11]; + pRD->pCurrentContextPointers->Gp = pUnwoundState->ptrCalleeSavedRegisters[12]; + pRD->pCurrentContextPointers->Tp = pUnwoundState->ptrCalleeSavedRegisters[13]; + pRD->pCurrentContextPointers->Ra = NULL; + return; + } +#endif // DACCESS_COMPILE + + // reset pContext; it's only valid for active (top-most) frame + pRD->pContext = NULL; + pRD->ControlPC = GetReturnAddress(); // m_MachState._pc; + pRD->SP = (DWORD64)(size_t)m_MachState._sp; + + pRD->pCurrentContext->Pc = pRD->ControlPC; + pRD->pCurrentContext->Sp = pRD->SP; + +#ifdef TARGET_UNIX + pRD->pCurrentContext->Fp = m_MachState.ptrCalleeSavedRegisters[0] ? *m_MachState.ptrCalleeSavedRegisters[0] : m_MachState.captureCalleeSavedRegisters[0]; + pRD->pCurrentContext->S1 = m_MachState.ptrCalleeSavedRegisters[1] ? *m_MachState.ptrCalleeSavedRegisters[1] : m_MachState.captureCalleeSavedRegisters[1]; + pRD->pCurrentContext->S2 = m_MachState.ptrCalleeSavedRegisters[2] ? *m_MachState.ptrCalleeSavedRegisters[2] : m_MachState.captureCalleeSavedRegisters[2]; + pRD->pCurrentContext->S3 = m_MachState.ptrCalleeSavedRegisters[3] ? *m_MachState.ptrCalleeSavedRegisters[3] : m_MachState.captureCalleeSavedRegisters[3]; + pRD->pCurrentContext->S4 = m_MachState.ptrCalleeSavedRegisters[4] ? *m_MachState.ptrCalleeSavedRegisters[4] : m_MachState.captureCalleeSavedRegisters[4]; + pRD->pCurrentContext->S5 = m_MachState.ptrCalleeSavedRegisters[5] ? *m_MachState.ptrCalleeSavedRegisters[5] : m_MachState.captureCalleeSavedRegisters[5]; + pRD->pCurrentContext->S6 = m_MachState.ptrCalleeSavedRegisters[6] ? *m_MachState.ptrCalleeSavedRegisters[6] : m_MachState.captureCalleeSavedRegisters[6]; + pRD->pCurrentContext->S7 = m_MachState.ptrCalleeSavedRegisters[7] ? *m_MachState.ptrCalleeSavedRegisters[7] : m_MachState.captureCalleeSavedRegisters[7]; + pRD->pCurrentContext->S8 = m_MachState.ptrCalleeSavedRegisters[8] ? *m_MachState.ptrCalleeSavedRegisters[8] : m_MachState.captureCalleeSavedRegisters[8]; + pRD->pCurrentContext->S9 = m_MachState.ptrCalleeSavedRegisters[9] ? *m_MachState.ptrCalleeSavedRegisters[9] : m_MachState.captureCalleeSavedRegisters[9]; + pRD->pCurrentContext->S10 = m_MachState.ptrCalleeSavedRegisters[10] ? *m_MachState.ptrCalleeSavedRegisters[10] : m_MachState.captureCalleeSavedRegisters[10]; + pRD->pCurrentContext->S11 = m_MachState.ptrCalleeSavedRegisters[11] ? *m_MachState.ptrCalleeSavedRegisters[11] : m_MachState.captureCalleeSavedRegisters[11]; + pRD->pCurrentContext->Gp = m_MachState.ptrCalleeSavedRegisters[12] ? *m_MachState.ptrCalleeSavedRegisters[12] : m_MachState.captureCalleeSavedRegisters[12]; + pRD->pCurrentContext->Tp = m_MachState.ptrCalleeSavedRegisters[13] ? *m_MachState.ptrCalleeSavedRegisters[13] : m_MachState.captureCalleeSavedRegisters[13]; + pRD->pCurrentContext->Ra = NULL; // Unwind again to get Caller's PC +#else // TARGET_UNIX + pRD->pCurrentContext->Fp = *m_MachState.ptrCalleeSavedRegisters[0]; + pRD->pCurrentContext->S1 = *m_MachState.ptrCalleeSavedRegisters[1]; + pRD->pCurrentContext->S2 = *m_MachState.ptrCalleeSavedRegisters[2]; + pRD->pCurrentContext->S3 = *m_MachState.ptrCalleeSavedRegisters[3]; + pRD->pCurrentContext->S4 = *m_MachState.ptrCalleeSavedRegisters[4]; + pRD->pCurrentContext->S5 = *m_MachState.ptrCalleeSavedRegisters[5]; + pRD->pCurrentContext->S6 = *m_MachState.ptrCalleeSavedRegisters[6]; + pRD->pCurrentContext->S7 = *m_MachState.ptrCalleeSavedRegisters[7]; + pRD->pCurrentContext->S8 = *m_MachState.ptrCalleeSavedRegisters[8]; + pRD->pCurrentContext->S9 = *m_MachState.ptrCalleeSavedRegisters[9]; + pRD->pCurrentContext->S10 = *m_MachState.ptrCalleeSavedRegisters[10]; + pRD->pCurrentContext->S11 = *m_MachState.ptrCalleeSavedRegisters[11]; + pRD->pCurrentContext->Gp = *m_MachState.ptrCalleeSavedRegisters[12]; + pRD->pCurrentContext->Tp = *m_MachState.ptrCalleeSavedRegisters[13]; + pRD->pCurrentContext->Ra = NULL; // Unwind again to get Caller's PC +#endif + +#if !defined(DACCESS_COMPILE) + pRD->pCurrentContextPointers->Fp = m_MachState.ptrCalleeSavedRegisters[0]; + pRD->pCurrentContextPointers->S1 = m_MachState.ptrCalleeSavedRegisters[1]; + pRD->pCurrentContextPointers->S2 = m_MachState.ptrCalleeSavedRegisters[2]; + pRD->pCurrentContextPointers->S3 = m_MachState.ptrCalleeSavedRegisters[3]; + pRD->pCurrentContextPointers->S4 = m_MachState.ptrCalleeSavedRegisters[4]; + pRD->pCurrentContextPointers->S5 = m_MachState.ptrCalleeSavedRegisters[5]; + pRD->pCurrentContextPointers->S6 = m_MachState.ptrCalleeSavedRegisters[6]; + pRD->pCurrentContextPointers->S7 = m_MachState.ptrCalleeSavedRegisters[7]; + pRD->pCurrentContextPointers->S8 = m_MachState.ptrCalleeSavedRegisters[8]; + pRD->pCurrentContextPointers->S9 = m_MachState.ptrCalleeSavedRegisters[9]; + pRD->pCurrentContextPointers->S10 = m_MachState.ptrCalleeSavedRegisters[10]; + pRD->pCurrentContextPointers->S11 = m_MachState.ptrCalleeSavedRegisters[11]; + pRD->pCurrentContextPointers->Gp = m_MachState.ptrCalleeSavedRegisters[12]; + pRD->pCurrentContextPointers->Tp = m_MachState.ptrCalleeSavedRegisters[13]; + pRD->pCurrentContextPointers->Ra = NULL; // Unwind again to get Caller's PC +#endif + ClearRegDisplayArgumentAndScratchRegisters(pRD); +} + +#ifndef DACCESS_COMPILE +void ThisPtrRetBufPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); +} + +#endif // !DACCESS_COMPILE + +void UpdateRegDisplayFromCalleeSavedRegisters(REGDISPLAY * pRD, CalleeSavedRegisters * pCalleeSaved) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); +} + + +void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); +} + + + +void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); +} + +void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); + RETURN; +} + +#ifdef FEATURE_HIJACK +TADDR ResumableFrame::GetReturnAddressPtr(void) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); + LIMITED_METHOD_DAC_CONTRACT; + return dac_cast(m_Regs) + offsetof(T_CONTEXT, Pc); +} + +void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); + RETURN; +} + +void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); +} +#endif // FEATURE_HIJACK + +#ifdef FEATURE_COMINTEROP + +void emitCOMStubCall (ComCallMethodDesc *pCOMMethodRX, ComCallMethodDesc *pCOMMethodRW, PCODE target) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); +} +#endif // FEATURE_COMINTEROP + +void JIT_TailCall() +{ + _ASSERTE(!"RISCV64:NYI"); +} + +#if !defined(DACCESS_COMPILE) +EXTERN_C void JIT_UpdateWriteBarrierState(bool skipEphemeralCheck, size_t writeableOffset); + +extern "C" void STDCALL JIT_PatchedCodeStart(); +extern "C" void STDCALL JIT_PatchedCodeLast(); + +static void UpdateWriteBarrierState(bool skipEphemeralCheck) +{ + BYTE *writeBarrierCodeStart = GetWriteBarrierCodeLocation((void*)JIT_PatchedCodeStart); + BYTE *writeBarrierCodeStartRW = writeBarrierCodeStart; + ExecutableWriterHolderNoLog writeBarrierWriterHolder; + if (IsWriteBarrierCopyEnabled()) + { + writeBarrierWriterHolder.AssignExecutableWriterHolder(writeBarrierCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart); + writeBarrierCodeStartRW = writeBarrierWriterHolder.GetRW(); + } + JIT_UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap(), writeBarrierCodeStartRW - writeBarrierCodeStart); +} + +void InitJITHelpers1() +{ + STANDARD_VM_CONTRACT; + + _ASSERTE(g_SystemInfo.dwNumberOfProcessors != 0); + + // Allocation helpers, faster but non-logging + if (!((TrackAllocationsEnabled()) || + (LoggingOn(LF_GCALLOC, LL_INFO10)) +#ifdef _DEBUG + || (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP) != 0) +#endif // _DEBUG + )) + { + if (GCHeapUtilities::UseThreadAllocationContexts()) + { + SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable); + SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable); + SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable); + SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable); + + ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString); + } + } + + UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap()); +} + +#else +void UpdateWriteBarrierState(bool) {} +#endif // !defined(DACCESS_COMPILE) + +PTR_CONTEXT GetCONTEXTFromRedirectedStubStackFrame(T_DISPATCHER_CONTEXT * pDispatcherContext) +{ + LIMITED_METHOD_DAC_CONTRACT; + + DWORD64 stackSlot = pDispatcherContext->EstablisherFrame + REDIRECTSTUB_SP_OFFSET_CONTEXT; + PTR_PTR_CONTEXT ppContext = dac_cast((TADDR)stackSlot); + return *ppContext; +} + +PTR_CONTEXT GetCONTEXTFromRedirectedStubStackFrame(T_CONTEXT * pContext) +{ + LIMITED_METHOD_DAC_CONTRACT; + + DWORD64 stackSlot = pContext->Sp + REDIRECTSTUB_SP_OFFSET_CONTEXT; + PTR_PTR_CONTEXT ppContext = dac_cast((TADDR)stackSlot); + return *ppContext; +} + +void RedirectForThreadAbort() +{ + // ThreadAbort is not supported in .net core + throw "NYI"; +} + +#if !defined(DACCESS_COMPILE) +FaultingExceptionFrame *GetFrameFromRedirectedStubStackFrame (DISPATCHER_CONTEXT *pDispatcherContext) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); + LIMITED_METHOD_CONTRACT; + + return (FaultingExceptionFrame*)NULL; +} + + +BOOL +AdjustContextForVirtualStub( + EXCEPTION_RECORD *pExceptionRecord, + CONTEXT *pContext) +{ + LIMITED_METHOD_CONTRACT; + + Thread * pThread = GetThreadNULLOk(); + + // We may not have a managed thread object. Example is an AV on the helper thread. + // (perhaps during StubManager::IsStub) + if (pThread == NULL) + { + return FALSE; + } + + PCODE f_IP = GetIP(pContext); + + VirtualCallStubManager::StubKind sk; + VirtualCallStubManager::FindStubManager(f_IP, &sk); + + if (sk == VirtualCallStubManager::SK_DISPATCH) + { + if (*PTR_DWORD(f_IP - 4) != DISPATCH_STUB_FIRST_DWORD) + { + _ASSERTE(!"AV in DispatchStub at unknown instruction"); + return FALSE; + } + } + else + if (sk == VirtualCallStubManager::SK_RESOLVE) + { + if (*PTR_DWORD(f_IP) != RESOLVE_STUB_FIRST_DWORD) + { + _ASSERTE(!"AV in ResolveStub at unknown instruction"); + return FALSE; + } + } + else + { + return FALSE; + } + + PCODE callsite = GetAdjustedCallAddress(GetRA(pContext)); + + // Lr must already have been saved before calling so it should not be necessary to restore Lr + + if (pExceptionRecord != NULL) + { + pExceptionRecord->ExceptionAddress = (PVOID)callsite; + } + SetIP(pContext, callsite); + + return TRUE; +} +#endif // !DACCESS_COMPILE + +UMEntryThunk * UMEntryThunk::Decode(void *pCallback) +{ + _ASSERTE(offsetof(UMEntryThunkCode, m_code) == 0); + UMEntryThunkCode * pCode = (UMEntryThunkCode*)pCallback; + + // We may be called with an unmanaged external code pointer instead. So if it doesn't look like one of our + // stubs (see UMEntryThunkCode::Encode below) then we'll return NULL. Luckily in these scenarios our + // caller will perform a hash lookup on successful return to verify our result in case random unmanaged + // code happens to look like ours. + if ((pCode->m_code[0] == 0x00009f97) && // auipc t6, 0 + (pCode->m_code[1] == 0x018fb383) && // ld t2, 24(t6) + (pCode->m_code[2] == 0x010fbf83) && // ld t6, 16(t6) + (pCode->m_code[3] == 0x000f8067)) // jalr x0, 0(t6) + { + return (UMEntryThunk*)pCode->m_pvSecretParam; + } + + return NULL; +} + +void UMEntryThunkCode::Encode(UMEntryThunkCode *pEntryThunkCodeRX, BYTE* pTargetCode, void* pvSecretParam) +{ + // auipc t6, 0 + // ld t2, 24(t6) + // ld t6, 16(t6) + // jalr x0, 0(t6) + // m_pTargetCode data + // m_pvSecretParam data + + m_code[0] = 0x00009f97; // auipc t6, 0 + m_code[1] = 0x018fb383; // ld t2, 24(t6) + m_code[2] = 0x010fbf83; // ld t6, 16(t6) + m_code[3] = 0x000f8067; // jalr x0, 0(t6) + + m_pTargetCode = (TADDR)pTargetCode; + m_pvSecretParam = (TADDR)pvSecretParam; + FlushInstructionCache(GetCurrentProcess(),&pEntryThunkCodeRX->m_code,sizeof(m_code)); +} + +#ifndef DACCESS_COMPILE + +void UMEntryThunkCode::Poison() +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); +} + +#endif // DACCESS_COMPILE + +#if !defined(DACCESS_COMPILE) +VOID ResetCurrentContext() +{ + LIMITED_METHOD_CONTRACT; +} +#endif + +LONG CLRNoCatchHandler(EXCEPTION_POINTERS* pExceptionInfo, PVOID pv) +{ + return EXCEPTION_CONTINUE_SEARCH; +} + +void FlushWriteBarrierInstructionCache() +{ + // this wouldn't be called in arm64, just to comply with gchelpers.h +} + +int StompWriteBarrierEphemeral(bool isRuntimeSuspended) +{ + UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap()); + return SWB_PASS; +} + +int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) +{ + UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap()); + return SWB_PASS; +} + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +int SwitchToWriteWatchBarrier(bool isRuntimeSuspended) +{ + UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap()); + return SWB_PASS; +} + +int SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended) +{ + UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap()); + return SWB_PASS; +} +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + +#ifdef DACCESS_COMPILE +BOOL GetAnyThunkTarget (T_CONTEXT *pctx, TADDR *pTarget, TADDR *pTargetMethodDesc) +{ + _ASSERTE(!"RISCV64:NYI"); + return FALSE; +} +#endif // DACCESS_COMPILE + +#ifndef DACCESS_COMPILE +// ---------------------------------------------------------------- +// StubLinkerCPU methods +// ---------------------------------------------------------------- + +void StubLinkerCPU::EmitMovConstant(IntReg target, UINT64 constant) +{ + if (0 == ((constant + 0x800) >> 32)) { + if (((constant + 0x800) >> 12) != 0) + { + Emit32((DWORD)(0x00000037 | (((constant + 0x800) >> 12) << 12) | (target << 7))); // lui target, (constant + 0x800) >> 12 + if ((constant & 0xFFF) != 0) + { + Emit32((DWORD)(0x00000013 | (constant & 0xFFF) << 20 | (target << 7) | (target << 15))); // addi target, target, constant + } + } + else + { + Emit32((DWORD)(0x00000013 | (constant & 0xFFF) << 20 | (target << 7))); // addi target, x0, constant + } + } + else + { + UINT32 upper = constant >> 32; + if (((upper + 0x800) >> 12) != 0) + { + Emit32((DWORD)(0x00000037 | (((upper + 0x800) >> 12) << 12) | (target << 7))); // lui target, (upper + 0x800) >> 12 + if ((upper & 0xFFF) != 0) + { + Emit32((DWORD)(0x00000013 | (upper & 0xFFF) << 20 | (target << 7) | (target << 15))); // addi target, target, upper + } + } + else + { + Emit32((DWORD)(0x00000013 | (upper & 0xFFF) << 20 | (target << 7))); // addi target, x0, upper + } + UINT32 lower = (constant << 32) >> 32; + UINT32 shift = 0; + for (int i = 32; i >= 0; i -= 11) + { + shift += i > 11 ? 11 : i; + UINT32 current = lower >> (i < 11 ? 0 : i - 11); + if (current != 0) + { + Emit32((DWORD)(0x00001013 | (shift << 20) | (target << 7) | (target << 15))); // slli target, target, shift + Emit32((DWORD)(0x00000013 | (current & 0x7FF) << 20 | (target << 7) | (target << 15))); // addi target, target, current + shift = 0; + } + } + if (shift) + { + Emit32((DWORD)(0x00001013 | (shift << 20) | (target << 7) | (target << 15))); // slli target, target, shift + } + } +} + +void StubLinkerCPU::EmitCmpImm(IntReg reg, int imm) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); +} + +void StubLinkerCPU::EmitCmpReg(IntReg Xn, IntReg Xm) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); +} + +void StubLinkerCPU::EmitCondFlagJump(CodeLabel * target, UINT cond) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); +} + +void StubLinkerCPU::EmitJumpRegister(IntReg regTarget) +{ + Emit32(0x00000067 | (regTarget << 15)); +} + +void StubLinkerCPU::EmitRet(IntReg Xn) +{ + Emit32((DWORD)(0x00000067 | (Xn << 15))); // jalr X0, 0(Xn) +} + +void StubLinkerCPU::EmitLoadStoreRegPairImm(DWORD flags, IntReg Xt1, IntReg Xt2, IntReg Xn, int offset) +{ + _ASSERTE((-1024 <= offset) && (offset <= 1015)); + _ASSERTE((offset & 7) == 0); + + BOOL isLoad = flags & 1; + if (isLoad) { + // ld Xt1, offset(Xn)); + Emit32((DWORD)(0x00003003 | (Xt1 << 7) | (Xn << 15) | (offset << 20))); + // ld Xt2, (offset+8)(Xn)); + Emit32((DWORD)(0x00003003 | (Xt2 << 7) | (Xn << 15) | ((offset + 8) << 20))); + } else { + // sd Xt1, offset(Xn) + Emit32((DWORD)(0x00003023 | (Xt1 << 20) | (Xn << 15) | (offset & 0xF) << 7 | (((offset >> 4) & 0xFF) << 25))); + // sd Xt1, (offset + 8)(Xn) + Emit32((DWORD)(0x00003023 | (Xt2 << 20) | (Xn << 15) | ((offset + 8) & 0xF) << 7 | ((((offset + 8) >> 4) & 0xFF) << 25))); + } +} + +void StubLinkerCPU::EmitLoadStoreRegPairImm(DWORD flags, FloatReg Ft1, FloatReg Ft2, IntReg Xn, int offset) +{ + _ASSERTE((-1024 <= offset) && (offset <= 1015)); + _ASSERTE((offset & 7) == 0); + + BOOL isLoad = flags & 1; + if (isLoad) { + // fld Ft, Xn, offset + Emit32((DWORD)(0x00003007 | (Xn << 15) | (Ft1 << 7) | (offset << 20))); + // fld Ft, Xn, offset + 8 + Emit32((DWORD)(0x00003007 | (Xn << 15) | (Ft2 << 7) | ((offset + 8) << 20))); + } else { + // fsd Ft, offset(Xn) + Emit32((WORD)(0x00003027 | (Xn << 15) | (Ft1 << 20) | (offset & 0xF) << 7 | ((offset >> 4) & 0xFF))); + // fsd Ft, (offset + 8)(Xn) + Emit32((WORD)(0x00003027 | (Xn << 15) | (Ft2 << 20) | ((offset + 8) & 0xF) << 7 | (((offset + 8) >> 4) & 0xFF))); + } +} + +void StubLinkerCPU::EmitLoadStoreRegImm(DWORD flags, IntReg Xt, IntReg Xn, int offset) +{ + BOOL isLoad = flags & 1; + if (isLoad) { + // ld regNum, offset(Xn); + Emit32((DWORD)(0x00003003 | (Xt << 7) | (Xn << 15) | (offset << 20))); + } else { + // sd regNum, offset(Xn) + Emit32((DWORD)(0x00003023 | (Xt << 20) | (Xn << 15) | (offset & 0xF) << 7 | (((offset >> 4) & 0xFF) << 25))); + } +} + +void StubLinkerCPU::EmitLoadStoreRegImm(DWORD flags, FloatReg Ft, IntReg Xn, int offset) +{ + BOOL isLoad = flags & 1; + if (isLoad) { + // fld Ft, Xn, offset + Emit32((DWORD)(0x00003007 | (Xn << 15) | (Ft << 7) | (offset << 20))); + } else { + // fsd Ft, offset(Xn) + Emit32((WORD)(0x00003027 | (Xn << 15) | (Ft << 20) | (offset & 0xF) << 7 | ((offset >> 4) & 0xFF))); + } +} + +void StubLinkerCPU::EmitLoadFloatRegImm(FloatReg ft, IntReg base, int offset) +{ + // fld ft,base,offset + _ASSERTE(offset <= 2047 && offset >= -2048); + Emit32(0x2b800000 | (base.reg << 15) | ((offset & 0xfff)<<20) | (ft.reg << 7)); +} + +void StubLinkerCPU::EmitMovReg(IntReg Xd, IntReg Xm) +{ + Emit32(0x00000013 | (Xm << 15) | (Xd << 7)); +} + +void StubLinkerCPU::EmitSubImm(IntReg Xd, IntReg Xn, unsigned int value) +{ + _ASSERTE((0 <= value) && (value <= 0x7FF)); + Emit32((DWORD)(0x00000013 | (((~value + 0x1) & 0xFFF) << 20) | (Xn << 15) | (Xd << 7))); // addi Xd, Xn, (~value + 0x1) & 0xFFF +} + +void StubLinkerCPU::EmitAddImm(IntReg Xd, IntReg Xn, unsigned int value) +{ + _ASSERTE((0 <= value) && (value <= 0x7FF)); + Emit32((DWORD)(0x00000013 | (value << 20) | (Xn << 15) | (Xd << 7))); // addi Xd, Xn, value +} + +void StubLinkerCPU::EmitCallRegister(IntReg reg) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); +} + +void StubLinkerCPU::Init() +{ + new (gBranchIF) BranchInstructionFormat(); +} + +// Emits code to adjust arguments for static delegate target. +VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray) +{ + // On entry a0 holds the delegate instance. Look up the real target address stored in the MethodPtrAux + // field and saved in t6. Tailcall to the target method after re-arranging the arguments + // ld t6, a0, offsetof(DelegateObject, _methodPtrAux) + EmitLoadStoreRegImm(eLOAD, IntReg(31)/*t6*/, IntReg(10)/*a0*/, DelegateObject::GetOffsetOfMethodPtrAux()); + // addi t5, a0, DelegateObject::GetOffsetOfMethodPtrAux() - load the indirection cell into t5 used by ResolveWorkerAsmStub + EmitAddImm(30/*t5*/, 10/*a0*/, DelegateObject::GetOffsetOfMethodPtrAux()); + + int delay_index[8] = {-1}; + bool is_store = false; + UINT16 index = 0; + int i = 0; + for (ShuffleEntry* pEntry = pShuffleEntryArray; pEntry->srcofs != ShuffleEntry::SENTINEL; pEntry++, i++) + { + if (pEntry->srcofs & ShuffleEntry::REGMASK) + { + // Source in register, destination in register + + // Both the srcofs and dstofs must be of the same kind of registers - float or general purpose. + // If source is present in register then destination may be a stack-slot. + _ASSERTE(((pEntry->dstofs & ShuffleEntry::FPREGMASK) == (pEntry->srcofs & ShuffleEntry::FPREGMASK)) || !(pEntry->dstofs & (ShuffleEntry::FPREGMASK | ShuffleEntry::REGMASK))); + _ASSERTE((pEntry->dstofs & ShuffleEntry::OFSREGMASK) <= 8);//should amend for offset! + _ASSERTE((pEntry->srcofs & ShuffleEntry::OFSREGMASK) <= 8); + + if (pEntry->srcofs & ShuffleEntry::FPREGMASK) + { + _ASSERTE(!"RISCV64: not validated on riscv64!!!"); + // FirstFloatReg is 10; + int j = 10; + while (pEntry[j].srcofs & ShuffleEntry::FPREGMASK) + { + j++; + } + assert((pEntry->dstofs - pEntry->srcofs) == index); + assert(8 > index); + + int tmp_reg = 0; // f0. + ShuffleEntry* tmp_entry = pShuffleEntryArray + delay_index[0]; + while (index) + { + // fld(Ft, sp, offset); + _ASSERTE(isValidSimm12(tmp_entry->srcofs << 3)); + Emit32(0x3007 | (tmp_reg << 15) | (2 << 7/*sp*/) | ((tmp_entry->srcofs << 3) << 20)); + tmp_reg++; + index--; + tmp_entry++; + } + + j -= 1; + tmp_entry = pEntry + j; + i += j; + while (pEntry[j].srcofs & ShuffleEntry::FPREGMASK) + { + if (pEntry[j].dstofs & ShuffleEntry::FPREGMASK)// fsgnj.d fd, fs, fs + Emit32(0x22000053 | ((pEntry[j].dstofs & ShuffleEntry::OFSREGMASK) << 7) | ((pEntry[j].srcofs & ShuffleEntry::OFSREGMASK) << 15) | ((pEntry[j].srcofs & ShuffleEntry::OFSREGMASK) << 20)); + else //// fsd(Ft, Rn, offset); + { + _ASSERTE(isValidSimm12((pEntry[j].dstofs * sizeof(long)))); + Emit32(0x3027 | ((pEntry[j].srcofs & ShuffleEntry::OFSREGMASK) << 20) | (2 << 15 /*sp*/) | ((pEntry[j].dstofs * sizeof(long) & 0x1f) << 7) | ((pEntry[j].dstofs * sizeof(long) & 0x7f) << 25)); + } + j--; + } + assert(tmp_reg <= 11); + /* + while (tmp_reg > 11) + { + tmp_reg--; + // fmov.d fd, fs + Emit32(0x01149800 | index | (tmp_reg << 5)); + index++; + } + */ + index = 0; + pEntry = tmp_entry; + } + else + { + // 10 is the offset of FirstGenArgReg to FirstGenReg + assert(pEntry->dstofs & ShuffleEntry::REGMASK); + assert((pEntry->dstofs & ShuffleEntry::OFSMASK) < (pEntry->srcofs & ShuffleEntry::OFSMASK)); + EmitMovReg(IntReg((pEntry->dstofs & ShuffleEntry::OFSMASK) + 10), IntReg((pEntry->srcofs & ShuffleEntry::OFSMASK) + 10)); + } + } + else if (pEntry->dstofs & ShuffleEntry::REGMASK) + { + // source must be on the stack + _ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK)); + + if (pEntry->dstofs & ShuffleEntry::FPREGMASK) + { + if (!is_store) + { + delay_index[index++] = i; + continue; + } + EmitLoadFloatRegImm(FloatReg((pEntry->dstofs & ShuffleEntry::OFSREGMASK) + 10), RegSp, pEntry->srcofs * sizeof(void*)); + } + else + { + assert(pEntry->dstofs & ShuffleEntry::REGMASK); + EmitLoadStoreRegImm(eLOAD, IntReg((pEntry->dstofs & ShuffleEntry::OFSMASK) + 10), RegSp, pEntry->srcofs * sizeof(void*)); + } + } + else + { + // source must be on the stack + _ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK)); + + // dest must be on the stack + _ASSERTE(!(pEntry->dstofs & ShuffleEntry::REGMASK)); + + EmitLoadStoreRegImm(eLOAD, IntReg(29)/*t4*/, RegSp, pEntry->srcofs * sizeof(void*)); + EmitLoadStoreRegImm(eSTORE, IntReg(29)/*t4*/, RegSp, pEntry->dstofs * sizeof(void*)); + } + } + + // Tailcall to target + // jalr x0, 0(t6) + EmitJumpRegister(31); +} + +// Emits code to adjust arguments for static delegate target. +VOID StubLinkerCPU::EmitComputedInstantiatingMethodStub(MethodDesc* pSharedMD, struct ShuffleEntry *pShuffleEntryArray, void* extraArg) +{ + STANDARD_VM_CONTRACT; + + for (ShuffleEntry* pEntry = pShuffleEntryArray; pEntry->srcofs != ShuffleEntry::SENTINEL; pEntry++) + { + _ASSERTE(pEntry->dstofs & ShuffleEntry::REGMASK); + _ASSERTE(pEntry->srcofs & ShuffleEntry::REGMASK); + _ASSERTE(!(pEntry->dstofs & ShuffleEntry::FPREGMASK)); + _ASSERTE(!(pEntry->srcofs & ShuffleEntry::FPREGMASK)); + _ASSERTE(pEntry->dstofs != ShuffleEntry::HELPERREG); + _ASSERTE(pEntry->srcofs != ShuffleEntry::HELPERREG); + + EmitMovReg(IntReg((pEntry->dstofs & ShuffleEntry::OFSREGMASK) + 4), IntReg((pEntry->srcofs & ShuffleEntry::OFSREGMASK) + 4)); + } + + MetaSig msig(pSharedMD); + ArgIterator argit(&msig); + + if (argit.HasParamType()) + { + ArgLocDesc sInstArgLoc; + argit.GetParamTypeLoc(&sInstArgLoc); + int regHidden = sInstArgLoc.m_idxGenReg; + _ASSERTE(regHidden != -1); + regHidden += 10;//NOTE: RISCV64 should start at a0=10; + + if (extraArg == NULL) + { + if (pSharedMD->RequiresInstMethodTableArg()) + { + // Unboxing stub case + // Fill param arg with methodtable of this pointer + // ld regHidden, a0, 0 + EmitLoadStoreRegImm(eLOAD, IntReg(regHidden), IntReg(10), 0); + } + } + else + { + EmitMovConstant(IntReg(regHidden), (UINT64)extraArg); + } + } + + if (extraArg == NULL) + { + // Unboxing stub case + // Address of the value type is address of the boxed instance plus sizeof(MethodDesc*). + // addi a0, a0, sizeof(MethodDesc*) + EmitAddImm(IntReg(10), IntReg(10), sizeof(MethodDesc*)); + } + + // Tail call the real target. + EmitCallManagedMethod(pSharedMD, TRUE /* tail call */); + SetTargetMethod(pSharedMD); +} + +void StubLinkerCPU::EmitCallLabel(CodeLabel *target, BOOL fTailCall, BOOL fIndirect) +{ + BranchInstructionFormat::VariationCodes variationCode = BranchInstructionFormat::VariationCodes::BIF_VAR_JUMP; + if (!fTailCall) + variationCode = static_cast(variationCode | BranchInstructionFormat::VariationCodes::BIF_VAR_CALL); + if (fIndirect) + variationCode = static_cast(variationCode | BranchInstructionFormat::VariationCodes::BIF_VAR_INDIRECT); + + EmitLabelRef(target, reinterpret_cast(gBranchIF), (UINT)variationCode); +} + +void StubLinkerCPU::EmitCallManagedMethod(MethodDesc *pMD, BOOL fTailCall) +{ + // Use direct call if possible. + if (pMD->HasStableEntryPoint()) + { + EmitCallLabel(NewExternalCodeLabel((LPVOID)pMD->GetStableEntryPoint()), fTailCall, FALSE); + } + else + { + EmitCallLabel(NewExternalCodeLabel((LPVOID)pMD->GetAddrOfSlot()), fTailCall, TRUE); + } +} + + +#ifdef FEATURE_READYTORUN + +// +// Allocation of dynamic helpers +// + +#define DYNAMIC_HELPER_ALIGNMENT sizeof(TADDR) + +#define BEGIN_DYNAMIC_HELPER_EMIT(size) \ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); +#define END_DYNAMIC_HELPER_EMIT() \ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); + +// Uses x8 as scratch register to store address of data label +// After load x8 is increment to point to next data +// only accepts positive offsets +static void LoadRegPair(BYTE* p, int reg1, int reg2, UINT32 offset) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); +} + +PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); + return NULL; +} + +// Caller must ensure sufficient byte are allocated including padding (if applicable) +void DynamicHelpers::EmitHelperWithArg(BYTE*& p, size_t rxOffset, LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); +} + +PCODE DynamicHelpers::CreateHelperWithArg(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); + return NULL; +} + +PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); + return NULL; +} + +PCODE DynamicHelpers::CreateHelperArgMove(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); + return NULL; +} + +PCODE DynamicHelpers::CreateReturn(LoaderAllocator * pAllocator) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); + return NULL; +} + +PCODE DynamicHelpers::CreateReturnConst(LoaderAllocator * pAllocator, TADDR arg) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); + return NULL; +} + +PCODE DynamicHelpers::CreateReturnIndirConst(LoaderAllocator * pAllocator, TADDR arg, INT8 offset) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); + return NULL; +} + +PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); + return NULL; +} + +PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); + return NULL; +} + +PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator, CORINFO_RUNTIME_LOOKUP * pLookup, DWORD dictionaryIndexAndSlot, Module * pModule) +{ + _ASSERTE(!"RISCV64: not implementation on riscv64!!!"); + return NULL; +} +#endif // FEATURE_READYTORUN + + +#endif // #ifndef DACCESS_COMPILE diff --git a/src/coreclr/vm/riscv64/thunktemplates.S b/src/coreclr/vm/riscv64/thunktemplates.S index a7cd5b6c4d2403..e078b1a7b989f3 100644 --- a/src/coreclr/vm/riscv64/thunktemplates.S +++ b/src/coreclr/vm/riscv64/thunktemplates.S @@ -4,4 +4,34 @@ #include "unixasmmacros.inc" #include "asmconstants.h" -#error "TODO-RISCV64: missing implementation" +LEAF_ENTRY StubPrecodeCode + auipc t1, 0x1 + ld t2, (StubPrecodeData__MethodDesc)(t1) + ld t1, (StubPrecodeData__Target)(t1) + jalr x0,t1,0 +LEAF_END_MARKED StubPrecodeCode + +LEAF_ENTRY FixupPrecodeCode + auipc t2, 0x1 + ld t2, (FixupPrecodeData__Target)(t2) + c.jr t2 + + auipc t2, 0x1 + ld t1, (FixupPrecodeData__PrecodeFixupThunk - 0xa)(t2) + ld t2, (FixupPrecodeData__MethodDesc - 0xa)(t2) + jalr x0, t1, 0 +LEAF_END_MARKED FixupPrecodeCode + +LEAF_ENTRY CallCountingStubCode + auipc t2, 0x1 + ld t3, (CallCountingStubData__RemainingCallCountCell)(t2) + lh t1, 0(t3) + addiw t1, t1, -1 + sh t1, 0(t3) + beq t1, zero, LOCAL_LABEL(CountReachedZero) + ld t1, (CallCountingStubData__TargetForMethod)(t2) + jalr x0, 0(t1) +LOCAL_LABEL(CountReachedZero): + ld t1, (CallCountingStubData__TargetForThresholdReached)(t2) + jalr x0,t1,0 +LEAF_END_MARKED CallCountingStubCode diff --git a/src/coreclr/vm/riscv64/virtualcallstubcpu.hpp b/src/coreclr/vm/riscv64/virtualcallstubcpu.hpp index 9a0cdd4e406d37..688c6eac727ec9 100644 --- a/src/coreclr/vm/riscv64/virtualcallstubcpu.hpp +++ b/src/coreclr/vm/riscv64/virtualcallstubcpu.hpp @@ -1,4 +1,567 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +// +// VirtualCallStubCpu.hpp +// +#ifndef _VIRTUAL_CALL_STUB_ARM_H +#define _VIRTUAL_CALL_STUB_ARM_H -#error "TODO-RISCV64: missing implementation" +#define DISPATCH_STUB_FIRST_DWORD 0x00000e97 +#define RESOLVE_STUB_FIRST_DWORD 0x00053e03 +#define VTABLECALL_STUB_FIRST_DWORD 0x00053e83 + +#define LOOKUP_STUB_FIRST_DWORD 0x00000f97 + +#define USES_LOOKUP_STUBS 1 + +struct LookupStub +{ + inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; } + inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; } + inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); } +private : + friend struct LookupHolder; + + DWORD _entryPoint[4]; + PCODE _resolveWorkerTarget; + size_t _token; +}; + +struct LookupHolder +{ +private: + LookupStub _stub; +public: + static void InitializeStatic() { } + + void Initialize(LookupHolder* pLookupHolderRX, PCODE resolveWorkerTarget, size_t dispatchToken) + { + // auipc t6, 0 + // ld t2, (12 + 12)(ra) + // ld t6, (4 + 12)(ra) + // jalr x0, t6, 0 + // + // _resolveWorkerTarget + // _token + + _stub._entryPoint[0] = LOOKUP_STUB_FIRST_DWORD; // auipc t6, 0 //0x00000097 + _stub._entryPoint[1] = 0x018fb383; //ld t2, 24(t6) + _stub._entryPoint[2] = 0x010fbf83; //ld t6, 16(t6) + _stub._entryPoint[3] = 0x000f8067; //jalr x0, t6, 0 + + _stub._resolveWorkerTarget = resolveWorkerTarget; + _stub._token = dispatchToken; + } + + LookupStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; } + static LookupHolder* FromLookupEntry(PCODE lookupEntry) + { + return (LookupHolder*) ( lookupEntry - offsetof(LookupHolder, _stub) - offsetof(LookupStub, _entryPoint) ); + } +}; + +struct DispatchStub +{ + inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; } + + inline size_t expectedMT() { LIMITED_METHOD_CONTRACT; return _expectedMT; } + inline PCODE implTarget() { LIMITED_METHOD_CONTRACT; return _implTarget; } + + inline TADDR implTargetSlot(EntryPointSlots::SlotType *slotTypeRef) const + { + LIMITED_METHOD_CONTRACT; + _ASSERTE(slotTypeRef != nullptr); + + *slotTypeRef = EntryPointSlots::SlotType_Executable; + return (TADDR)&_implTarget; + } + + inline PCODE failTarget() { LIMITED_METHOD_CONTRACT; return _failTarget; } + inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(DispatchStub); } + +private: + friend struct DispatchHolder; + + DWORD _entryPoint[9]; + DWORD _pad; + size_t _expectedMT; + PCODE _implTarget; + PCODE _failTarget; +}; + +struct DispatchHolder +{ + static void InitializeStatic() + { + LIMITED_METHOD_CONTRACT; + + // Check that _implTarget is aligned in the DispatchHolder for backpatching + static_assert_no_msg(((offsetof(DispatchHolder, _stub) + offsetof(DispatchStub, _implTarget)) % sizeof(void *)) == 0); + } + + void Initialize(DispatchHolder* pDispatchHolderRX, PCODE implTarget, PCODE failTarget, size_t expectedMT) + { + // auipc t4,0 + // addi t4, t4, 36 + // ld t0,0(a0) ; methodTable from object in $a0 + // ld t6,0(t4) // t6 _expectedMT + // bne t6, t0, failLabel + // ld t4, 8(t4) // t4 _implTarget + // jalr x0, t4, 0 + // failLabel: + // ld t4, 16(t4) // t4 _failTarget + // jalr x0, t4, 0 + // + // + // _expectedMT + // _implTarget + // _failTarget + + _stub._entryPoint[0] = DISPATCH_STUB_FIRST_DWORD; // auipc t4,0 // 0x00000e97 + _stub._entryPoint[1] = 0x028e8e93; // addi t4, t4, 40 + _stub._entryPoint[2] = 0x00053283; // ld t0, 0(a0) //; methodTable from object in $a0 + _stub._entryPoint[3] = 0x000ebf83; // ld r6, 0(t4) // t6 _expectedMT + _stub._entryPoint[4] = 0x005f9663; // bne t6, t0, failLabel + _stub._entryPoint[5] = 0x008ebe83; // ld t4, 8(t4) // t4 _implTarget + _stub._entryPoint[6] = 0x000e8067; // jalr x0, t4, 0 + _stub._entryPoint[7] = 0x010ebe83; // ld t4, 16(t4) // t4 _failTarget + _stub._entryPoint[8] = 0x000e8067; // jalr x0, t4, 0 + + _stub._expectedMT = expectedMT; + _stub._implTarget = implTarget; + _stub._failTarget = failTarget; + } + + DispatchStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; } + + static DispatchHolder* FromDispatchEntry(PCODE dispatchEntry) + { + LIMITED_METHOD_CONTRACT; + DispatchHolder* dispatchHolder = (DispatchHolder*) ( dispatchEntry - offsetof(DispatchHolder, _stub) - offsetof(DispatchStub, _entryPoint) ); + return dispatchHolder; + } + +private: + DispatchStub _stub; +}; + +struct ResolveStub +{ + inline PCODE failEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_failEntryPoint[0]; } + inline PCODE resolveEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_resolveEntryPoint[0]; } + inline PCODE slowEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_slowEntryPoint[0]; } + inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; } + inline INT32* pCounter() { LIMITED_METHOD_CONTRACT; return _pCounter; } + + inline UINT32 hashedToken() { LIMITED_METHOD_CONTRACT; return _hashedToken >> LOG2_PTRSIZE; } + inline size_t cacheAddress() { LIMITED_METHOD_CONTRACT; return _cacheAddress; } + inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(ResolveStub); } + +private: + friend struct ResolveHolder; + const static int resolveEntryPointLen = 20; // TODO RISCV64 + const static int slowEntryPointLen = 4; + const static int failEntryPointLen = 9; + + DWORD _resolveEntryPoint[resolveEntryPointLen]; + DWORD _slowEntryPoint[slowEntryPointLen]; + DWORD _failEntryPoint[failEntryPointLen]; + UINT32 _hashedToken; + INT32* _pCounter; //Base of the Data Region + size_t _cacheAddress; // lookupCache + size_t _token; + PCODE _resolveWorkerTarget; +}; + +struct ResolveHolder +{ + static void InitializeStatic() { } + + void Initialize(ResolveHolder* pResolveHolderRX, + PCODE resolveWorkerTarget, PCODE patcherTarget, + size_t dispatchToken, UINT32 hashedToken, + void * cacheAddr, INT32 * counterAddr) + { + int n=0; + INT32 pc_offset; + +/******** Rough Convention of used in this routine + ;;ra temp base address of loading data region + ;;t5 indirection cell // TODO CHECK t8 => t5 + ;;t3 MethodTable (from object ref in a0), out: this._token + ;;t0 hash scratch + ;;t1 temp + ;;t2 temp + ;;t6 hash scratch // TODO CHECK R21 => t6 + ;;cachemask => [CALL_STUB_CACHE_MASK * sizeof(void*)] + + // Called directly by JITTED code + // ResolveStub._resolveEntryPoint(a0:Object*, a1 ...,a7, t8:IndirectionCellAndFlags) + // { + // MethodTable mt = a0.m_pMethTab; + // int i = ((mt + mt >> 12) ^ this._hashedToken) & _cacheMask + // ResolveCacheElem e = this._cacheAddress + i + // t1 = e = this._cacheAddress + i + // if (mt == e.pMT && this._token == e.token) + // { + // (e.target)(a0, [a1,...,a7]); + // } + // else + // { + // t3 = this._token; + // (this._slowEntryPoint)(a0, [a1,.., a7], t5, t3); + // } + // } + ********/ + + ///;;resolveEntryPoint + // Called directly by JITTED code + // ResolveStub._resolveEntryPoint(a0:Object*, a1 ...,a7, t5:IndirectionCellAndFlags) + + // ld t3, 0(a0) + _stub._resolveEntryPoint[n++] = RESOLVE_STUB_FIRST_DWORD; + // srli t0, t3, 0xc + _stub._resolveEntryPoint[n++] = 0x00ce5293; + // add t1, t3, t0 + _stub._resolveEntryPoint[n++] = 0x005e0333; + // auipc t0, 0 + _stub._resolveEntryPoint[n++] = 0x00000297; + // addi t0, t0, -16 + _stub._resolveEntryPoint[n++] = 0xff428293; + + // lw t6, 0(t0) #t6 = this._hashedToken + _stub._resolveEntryPoint[n++] = 0x0002af83 | (33 << 22); //(20+4+9)*4<<20; + _ASSERTE((ResolveStub::resolveEntryPointLen+ResolveStub::slowEntryPointLen+ResolveStub::failEntryPointLen) == 33); + _ASSERTE((33<<2) == (offsetof(ResolveStub, _hashedToken) -offsetof(ResolveStub, _resolveEntryPoint[0]))); + + // xor t1, t1, t6 + _stub._resolveEntryPoint[n++] = 0x01f34333; + // cachemask + _ASSERTE(CALL_STUB_CACHE_MASK * sizeof(void*) == 0x7ff8); + // lui t6, 0x7ff8 + _stub._resolveEntryPoint[n++] = 0x07ff8fb7; + // srliw t6, t6, 12 + _stub._resolveEntryPoint[n++] = 0x00cfdf9b; + // and t1, t1, t6 + _stub._resolveEntryPoint[n++] = 0x01f37333; + // ld t6, 0(t0) # t6 = this._cacheAddress + _stub._resolveEntryPoint[n++] = 0x0002bf83 | (36 << 22); //(20+4+9+1+2)*4<<20; + _ASSERTE((ResolveStub::resolveEntryPointLen+ResolveStub::slowEntryPointLen+ResolveStub::failEntryPointLen+1+2) == 36); + _ASSERTE((36<<2) == (offsetof(ResolveStub, _cacheAddress) -offsetof(ResolveStub, _resolveEntryPoint[0]))); + // add t1, t6, t1 + _stub._resolveEntryPoint[n++] = 0x006f8333; + // ld t1, 0(t1) # t1 = e = this._cacheAddress[i] + _stub._resolveEntryPoint[n++] = 0x00033303; + + // ld t6, 0(t1) # t6 = Check mt == e.pMT; + _stub._resolveEntryPoint[n++] = 0x00033f83 | ((offsetof(ResolveCacheElem, pMT) & 0xfff) << 20); + // ld t2, 0(t0) # $t2 = this._token + _stub._resolveEntryPoint[n++] = 0x0002b383 | (38<<22);//(20+4+9+1+2+2)*4<<20; + _ASSERTE((ResolveStub::resolveEntryPointLen+ResolveStub::slowEntryPointLen+ResolveStub::failEntryPointLen+1+4) == 38); + _ASSERTE((38<<2) == (offsetof(ResolveStub, _token) -offsetof(ResolveStub, _resolveEntryPoint[0]))); + + // bne t6, t3, next + _stub._resolveEntryPoint[n++] = 0x01cf9a63;// | PC_REL_OFFSET(_slowEntryPoint[0], n); + + // ld t6, 0(t1) # t6 = e.token; + _stub._resolveEntryPoint[n++] = 0x00033f83 | ((offsetof(ResolveCacheElem, token) & 0xfff)<<10); + // bne t6, t2, next + _stub._resolveEntryPoint[n++] = 0x007f9663;// | PC_REL_OFFSET(_slowEntryPoint[0], n); + + pc_offset = offsetof(ResolveCacheElem, target) & 0xffffffff; + _ASSERTE(pc_offset >=0 && pc_offset%8 == 0); + // ld t3, 0(t1) # t3 = e.target; + _stub._resolveEntryPoint[n++] = 0x00033e03 | ((offsetof(ResolveCacheElem, target) & 0xfff)<<10); + // jalr x0, t3, 0 + _stub._resolveEntryPoint[n++] = 0x000e0067; + + _ASSERTE(n == ResolveStub::resolveEntryPointLen); + _ASSERTE(_stub._resolveEntryPoint + n == _stub._slowEntryPoint); + + // ResolveStub._slowEntryPoint(a0:MethodToken, [a1..a7], t5:IndirectionCellAndFlags) + // { + // t2 = this._token; + // this._resolveWorkerTarget(a0, [a1..a7], t5, t2); + // } +//#undef PC_REL_OFFSET +//#define PC_REL_OFFSET(_member, _index) (((INT32)(offsetof(ResolveStub, _member) - (offsetof(ResolveStub, _slowEntryPoint[_index])))) & 0xffff) + // ;;slowEntryPoint: + // ;;fall through to the slow case + + // auipc t0, 0 + _stub._slowEntryPoint[0] = 0x00000297; + // ld t6, 0(t0) # r21 = _resolveWorkerTarget; + _ASSERTE((0x14*4) == ((INT32)(offsetof(ResolveStub, _resolveWorkerTarget) - (offsetof(ResolveStub, _slowEntryPoint[0]))))); + _ASSERTE((ResolveStub::slowEntryPointLen + ResolveStub::failEntryPointLen+1+3*2) == 0x14); + _stub._slowEntryPoint[1] = 0x0002bf83 | ((0x14 * 4) << 20); + + // ld t2, 0(t0) # t2 = this._token; + _stub._slowEntryPoint[2] = 0x0002b383 | ((0x12 * 4) << 20); //(18*4=72=0x48)<<20 + _ASSERTE((ResolveStub::slowEntryPointLen+ResolveStub::failEntryPointLen+1+4)*4 == (0x12 * 4)); + _ASSERTE((0x12 * 4) == (offsetof(ResolveStub, _token) -offsetof(ResolveStub, _slowEntryPoint[0]))); + + // jalr x0, t6, 0 + _stub._slowEntryPoint[3] = 0x000f8067; + + _ASSERTE(4 == ResolveStub::slowEntryPointLen); + + // ResolveStub._failEntryPoint(a0:MethodToken, a1,.., a7, t5:IndirectionCellAndFlags) + // { + // if(--*(this._pCounter) < 0) t5 = t5 | SDF_ResolveBackPatch; + // this._resolveEntryPoint(a0, [a1..a7]); + // } +//#undef PC_REL_OFFSET +//#define PC_REL_OFFSET(_member, _index) (((INT32)(offsetof(ResolveStub, _member) - (offsetof(ResolveStub, _failEntryPoint[_index])))) & 0xffff) + //;;failEntryPoint + + // auipc t0, 0 + _stub._failEntryPoint[0] = 0x00000297; + // ld t1, 0(t0) # t1 = _pCounter; 0x2800000=((failEntryPointLen+1)*4)<<20. + _stub._failEntryPoint[1] = 0x0002b303 | 0x2800000; + _ASSERTE((((ResolveStub::failEntryPointLen+1)*4)<<20) == 0x2800000); + _ASSERTE((0x2800000>>20) == ((INT32)(offsetof(ResolveStub, _pCounter) - (offsetof(ResolveStub, _failEntryPoint[0]))))); + // lw t6, 0(t1) + _stub._failEntryPoint[2] = 0x00032f83; + // addi t6, t6, -1 + _stub._failEntryPoint[3] = 0xffff8f93; + + // sw t6, 0(t1) + _stub._failEntryPoint[4] = 0x01f32023; + + _ASSERTE(SDF_ResolveBackPatch == 0x1); + // ;; ori t5, t5, t6 >=0 ? SDF_ResolveBackPatch:0; + // slti t6, t6, 0 + _stub._failEntryPoint[5] = 0x000faf93; + // xori t6, t6, 1 + _stub._failEntryPoint[6] = 0x001fcf93; + // or t5, t5, t6 + _stub._failEntryPoint[7] = 0x01ff6f33; + + // j _resolveEntryPoint // pc - 128 = pc + 4 - resolveEntryPointLen * 4 - slowEntryPointLen * 4 - failEntryPointLen * 4; + _stub._failEntryPoint[8] = 0xf81ff06f; + + _ASSERTE(9 == ResolveStub::failEntryPointLen); + _stub._pCounter = counterAddr; + _stub._hashedToken = hashedToken << LOG2_PTRSIZE; + _stub._cacheAddress = (size_t) cacheAddr; + _stub._token = dispatchToken; + _stub._resolveWorkerTarget = resolveWorkerTarget; + + _ASSERTE(resolveWorkerTarget == (PCODE)ResolveWorkerChainLookupAsmStub); + _ASSERTE(patcherTarget == NULL); + +#undef DATA_OFFSET +#undef PC_REL_OFFSET +#undef Dataregionbase + } + + ResolveStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; } + + static ResolveHolder* FromFailEntry(PCODE failEntry); + static ResolveHolder* FromResolveEntry(PCODE resolveEntry); +private: + ResolveStub _stub; +}; + + +/*VTableCallStub************************************************************************************** +These are jump stubs that perform a vtable-base virtual call. These stubs assume that an object is placed +in the first argument register (this pointer). From there, the stub extracts the MethodTable pointer, followed by the +vtable pointer, and finally jumps to the target method at a given slot in the vtable. +*/ +struct VTableCallStub +{ + friend struct VTableCallHolder; + + inline size_t size() + { + _ASSERTE(!"RISCV64:NYI"); + return 0; + } + + inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; } + + inline size_t token() + { + LIMITED_METHOD_CONTRACT; + DWORD slot = *(DWORD*)(reinterpret_cast(this) + size() - 4); + return DispatchToken::CreateDispatchToken(slot).To_SIZE_T(); + } + +private: + BYTE _entryPoint[0]; // Dynamically sized stub. See Initialize() for more details. +}; + +/* VTableCallHolders are the containers for VTableCallStubs, they provide for any alignment of +stubs as necessary. */ +struct VTableCallHolder +{ + void Initialize(unsigned slot); + + VTableCallStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast(this); } + + static size_t GetHolderSize(unsigned slot) + { + STATIC_CONTRACT_WRAPPER; + unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; + unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; + int indirectionsCodeSize = (offsetOfIndirection >= 0x1000 ? 12 : 4) + (offsetAfterIndirection >= 0x1000 ? 12 : 4); + int indirectionsDataSize = (offsetOfIndirection >= 0x1000 ? 4 : 0) + (offsetAfterIndirection >= 0x1000 ? 4 : 0); + return 12 + indirectionsCodeSize + ((indirectionsDataSize > 0) ? (indirectionsDataSize + 4) : 0); + } + + static VTableCallHolder* FromVTableCallEntry(PCODE entry) { LIMITED_METHOD_CONTRACT; return (VTableCallHolder*)entry; } + +private: + // VTableCallStub follows here. It is dynamically sized on allocation because it could + // use short/long instruction sizes for LDR, depending on the slot value. +}; + + +#ifdef DECLARE_DATA + +#ifndef DACCESS_COMPILE +ResolveHolder* ResolveHolder::FromFailEntry(PCODE failEntry) +{ + LIMITED_METHOD_CONTRACT; + ResolveHolder* resolveHolder = (ResolveHolder*) ( failEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _failEntryPoint) ); + return resolveHolder; +} + +ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry) +{ + LIMITED_METHOD_CONTRACT; + ResolveHolder* resolveHolder = (ResolveHolder*) ( resolveEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _resolveEntryPoint) ); + return resolveHolder; +} + +void VTableCallHolder::Initialize(unsigned slot) +{ + unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; + unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; + + VTableCallStub* pStub = stub(); + BYTE* p = (BYTE*)pStub->entryPoint(); + + // ld t4, 0(a0) : t4 = MethodTable pointer + *(UINT32*)p = 0x00053e83; // VTABLECALL_STUB_FIRST_DWORD + p += 4; + + if ((offsetOfIndirection >= 0x1000) || (offsetAfterIndirection >= 0x1000)) + { + *(UINT32*)p = 0x00000317; // auipc t1, 0 + p += 4; + } + + if (offsetOfIndirection >= 0x1000) + { + uint dataOffset = 20 + (offsetAfterIndirection >= 0x1000 ? 12 : 4); + + // lwu t3,dataOffset(t1) + *(DWORD*)p = 0x00036e03 | ((UINT32)dataOffset << 20); p += 4; + // add t4, t4, t3 + *(DWORD*)p = 0x01ce8eb3; p += 4; + // ld t4, offsetOfIndirection(t4) + *(DWORD*)p = 0x000ebe83; p += 4; + } + else + { + // ld t4, offsetOfIndirection(t4) + *(DWORD*)p = 0x000ebe83 | ((UINT32)offsetOfIndirection << 20); p += 4; + } + + if (offsetAfterIndirection >= 0x1000) + { + uint indirectionsCodeSize = (offsetOfIndirection >= 0x1000 ? 12 : 4); + uint indirectionsDataSize = (offsetOfIndirection >= 0x1000 ? 4 : 0); + uint dataOffset = 20 + indirectionsCodeSize + indirectionsDataSize; + + // ldw t3,dataOffset(t1) + *(DWORD*)p = 0x00036e03 | ((UINT32)dataOffset << 20); p += 4; + // add t4, t4, t3 + *(DWORD*)p = 0x01ce8eb3; p += 4; + // ld t4, 0(t4) + *(DWORD*)p = 0x000ebe83; p += 4; + } + else + { + // ld t4, offsetOfIndirection(t4) + *(DWORD*)p = 0x000ebe83 | ((UINT32)offsetAfterIndirection << 20); p += 4; + } + + // jalr x0, t4, 0 + *(UINT32*)p = 0x000e8067; p += 4; + + // data labels: + if (offsetOfIndirection >= 0x1000) + { + *(UINT32*)p = (UINT32)offsetOfIndirection; + p += 4; + } + if (offsetAfterIndirection >= 0x1000) + { + *(UINT32*)p = (UINT32)offsetAfterIndirection; + p += 4; + } + + // Store the slot value here for convenience. Not a real instruction (unreachable anyways) + // NOTE: Not counted in codeSize above. + *(UINT32*)p = slot; p += 4; + + _ASSERT(p == (BYTE*)stub()->entryPoint() + VTableCallHolder::GetHolderSize(slot)); + _ASSERT(stub()->size() == VTableCallHolder::GetHolderSize(slot)); +} + +#endif // DACCESS_COMPILE + +VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE stubStartAddress) +{ + SUPPORTS_DAC; +#ifdef DACCESS_COMPILE + + return SK_BREAKPOINT; // Dac always uses the slower lookup + +#else + + StubKind stubKind = SK_UNKNOWN; + TADDR pInstr = PCODEToPINSTR(stubStartAddress); + + EX_TRY + { + // If stubStartAddress is completely bogus, then this might AV, + // so we protect it with SEH. An AV here is OK. + AVInRuntimeImplOkayHolder AVOkay; + + DWORD firstDword = *((DWORD*) pInstr); + + if (firstDword == DISPATCH_STUB_FIRST_DWORD) // assembly of first instruction of DispatchStub : + { + stubKind = SK_DISPATCH; + } + else if (firstDword == RESOLVE_STUB_FIRST_DWORD) // assembly of first instruction of ResolveStub : + { + stubKind = SK_RESOLVE; + } + else if (firstDword == VTABLECALL_STUB_FIRST_DWORD) // assembly of first instruction of VTableCallStub : + { + stubKind = SK_VTABLECALL; + } + else if (firstDword == LOOKUP_STUB_FIRST_DWORD) // first instruction of LookupStub : + { + stubKind = SK_LOOKUP; + } + } + EX_CATCH + { + stubKind = SK_UNKNOWN; + } + EX_END_CATCH(SwallowAllExceptions); + + return stubKind; + +#endif // DACCESS_COMPILE +} + +#endif //DECLARE_DATA + +#endif // _VIRTUAL_CALL_STUB_ARM_H diff --git a/src/coreclr/vm/stackwalk.cpp b/src/coreclr/vm/stackwalk.cpp index 6b6d8c10f29daf..6abe943c47b4c4 100644 --- a/src/coreclr/vm/stackwalk.cpp +++ b/src/coreclr/vm/stackwalk.cpp @@ -303,7 +303,7 @@ bool CrawlFrame::IsGcSafe() return GetCodeManager()->IsGcSafe(&codeInfo, GetRelOffset()); } -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool CrawlFrame::HasTailCalls() { CONTRACTL { @@ -314,7 +314,7 @@ bool CrawlFrame::HasTailCalls() return GetCodeManager()->HasTailCalls(&codeInfo); } -#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 inline void CrawlFrame::GotoNextFrame() { @@ -647,7 +647,7 @@ PCODE Thread::VirtualUnwindLeafCallFrame(T_CONTEXT* pContext) uControlPc = TADDR(pContext->Lr); -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) uControlPc = TADDR(pContext->Ra); #else diff --git a/src/coreclr/vm/stackwalk.h b/src/coreclr/vm/stackwalk.h index dcdd0bc79701fb..a85c85650a5778 100644 --- a/src/coreclr/vm/stackwalk.h +++ b/src/coreclr/vm/stackwalk.h @@ -305,9 +305,9 @@ class CrawlFrame */ bool IsGcSafe(); -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool HasTailCalls(); -#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 PREGDISPLAY GetRegisterSet() { diff --git a/src/coreclr/vm/stublink.cpp b/src/coreclr/vm/stublink.cpp index 58048bfb1ab8c2..db228659c39774 100644 --- a/src/coreclr/vm/stublink.cpp +++ b/src/coreclr/vm/stublink.cpp @@ -1891,7 +1891,6 @@ UINT StubLinker::GetStackFrameSize() return m_cbStackSpace + (2 + m_cCalleeSavedRegs + m_cIntRegArgs + m_cVecRegArgs)*sizeof(void*); } - #endif // ifdef TARGET_ARM, elif defined(TARGET_ARM64) #endif // #ifndef DACCESS_COMPILE diff --git a/src/coreclr/vm/stubmgr.cpp b/src/coreclr/vm/stubmgr.cpp index 7376386b34079a..ffa0fa5c322bda 100644 --- a/src/coreclr/vm/stubmgr.cpp +++ b/src/coreclr/vm/stubmgr.cpp @@ -1829,7 +1829,7 @@ static BOOL IsVarargPInvokeStub(PCODE stubStartAddress) if (stubStartAddress == GetEEFuncEntryPoint(VarargPInvokeStub)) return TRUE; -#if !defined(TARGET_X86) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_X86) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) if (stubStartAddress == GetEEFuncEntryPoint(VarargPInvokeStub_RetBuffArg)) return TRUE; #endif diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp index cce1d0ba28ffee..b67983409348e0 100644 --- a/src/coreclr/vm/threads.cpp +++ b/src/coreclr/vm/threads.cpp @@ -1114,11 +1114,11 @@ extern "C" void *JIT_WriteBarrier_Loc; void *JIT_WriteBarrier_Loc = 0; #endif -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) extern "C" void (*JIT_WriteBarrier_Table)(); extern "C" void *JIT_WriteBarrier_Table_Loc; void *JIT_WriteBarrier_Table_Loc = 0; -#endif // TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 #ifndef TARGET_UNIX // g_TlsIndex is only used by the DAC. Disable optimizations around it to prevent it from getting optimized out. @@ -1182,17 +1182,17 @@ void InitThreadManager() SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier)); ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier), W("@WriteBarrier")); -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Store the JIT_WriteBarrier_Table copy location to a global variable so that it can be updated. JIT_WriteBarrier_Table_Loc = GetWriteBarrierCodeLocation((void*)&JIT_WriteBarrier_Table); -#endif // TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 -#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) SetJitHelperFunction(CORINFO_HELP_CHECKED_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_CheckedWriteBarrier)); ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_CheckedWriteBarrier), W("@CheckedWriteBarrier")); SetJitHelperFunction(CORINFO_HELP_ASSIGN_BYREF, GetWriteBarrierCodeLocation((void*)JIT_ByRefWriteBarrier)); ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_ByRefWriteBarrier), W("@ByRefWriteBarrier")); -#endif // TARGET_ARM64 || TARGET_ARM || TARGET_LOONGARCH64 +#endif // TARGET_ARM64 || TARGET_ARM || TARGET_LOONGARCH64 || TARGET_RISCV64 } else @@ -1216,10 +1216,10 @@ void InitThreadManager() #else JIT_WriteBarrier_Loc = (void*)JIT_WriteBarrier; #endif -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Store the JIT_WriteBarrier_Table copy location to a global variable so that it can be updated. JIT_WriteBarrier_Table_Loc = (void*)&JIT_WriteBarrier_Table; -#endif // TARGET_ARM64 || TARGET_LOONGARCH64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 } #ifndef TARGET_UNIX diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 89f2f9d33f7e2c..ffff61764fc97c 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -3722,7 +3722,7 @@ void Thread::CommitGCStressInstructionUpdate() else *(DWORD*)destCodeWriterHolder.GetRW() = *(DWORD*)pbSrcCode; -#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) *(DWORD*)destCodeWriterHolder.GetRW() = *(DWORD*)pbSrcCode; @@ -4881,7 +4881,7 @@ StackWalkAction SWCB_GetExecutionState(CrawlFrame *pCF, VOID *pData) { // We already have the caller context available at this point _ASSERTE(pRDT->IsCallerContextValid); -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Why do we use CallerContextPointers below? // @@ -4900,7 +4900,7 @@ StackWalkAction SWCB_GetExecutionState(CrawlFrame *pCF, VOID *pData) // Note that the JIT always pushes LR even for leaf methods to make hijacking // work for them. See comment in code:Compiler::genPushCalleeSavedRegisters. -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) if (pRDT->pCallerContextPointers->Ra == &pRDT->pContext->Ra) #else if(pRDT->pCallerContextPointers->Lr == &pRDT->pContext->Lr) @@ -4939,7 +4939,7 @@ StackWalkAction SWCB_GetExecutionState(CrawlFrame *pCF, VOID *pData) // This is the case of IP being inside the method body and LR is // pushed on the stack. We get it to determine the return address // in the caller of the current non-interruptible frame. -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) pES->m_ppvRetAddrPtr = (void **) pRDT->pCallerContextPointers->Ra; #else pES->m_ppvRetAddrPtr = (void **) pRDT->pCallerContextPointers->Lr; diff --git a/src/libraries/Microsoft.NETCore.Platforms/src/runtime.compatibility.json b/src/libraries/Microsoft.NETCore.Platforms/src/runtime.compatibility.json index 926d654022362d..9021b83b2c87d4 100644 --- a/src/libraries/Microsoft.NETCore.Platforms/src/runtime.compatibility.json +++ b/src/libraries/Microsoft.NETCore.Platforms/src/runtime.compatibility.json @@ -5689,6 +5689,14 @@ "any", "base" ], + "linux-riscv64": [ + "linux-riscv64", + "linux", + "unix-riscv64", + "unix", + "any", + "base" + ], "linux-mips64": [ "linux-mips64", "linux", @@ -10499,6 +10507,12 @@ "any", "base" ], + "unix-riscv64": [ + "unix-riscv64", + "unix", + "any", + "base" + ], "unix-mips64": [ "unix-mips64", "unix", @@ -11159,4 +11173,4 @@ "any", "base" ] -} \ No newline at end of file +} diff --git a/src/libraries/Microsoft.NETCore.Platforms/src/runtime.json b/src/libraries/Microsoft.NETCore.Platforms/src/runtime.json index eb865ee6422dee..7348f16ac80b37 100644 --- a/src/libraries/Microsoft.NETCore.Platforms/src/runtime.json +++ b/src/libraries/Microsoft.NETCore.Platforms/src/runtime.json @@ -1988,6 +1988,12 @@ "unix-loongarch64" ] }, + "linux-riscv64": { + "#import": [ + "linux", + "unix-riscv64" + ] + }, "linux-mips64": { "#import": [ "linux", @@ -4057,6 +4063,11 @@ "unix" ] }, + "unix-riscv64": { + "#import": [ + "unix" + ] + }, "unix-mips64": { "#import": [ "unix" @@ -4406,4 +4417,4 @@ ] } } -} \ No newline at end of file +} diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 7fa24c7ab00f82..733e9531fb0824 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -19,7 +19,7 @@ false $(MSBuildThisFileDirectory)ILLink\ true - true + true true diff --git a/src/native/external/libunwind.cmake b/src/native/external/libunwind.cmake index 2400d28e84b6e0..73815d821588e3 100644 --- a/src/native/external/libunwind.cmake +++ b/src/native/external/libunwind.cmake @@ -198,6 +198,7 @@ set(libunwind_la_SOURCES_riscv riscv/Lget_proc_info.c riscv/Linit.c riscv/Lis_signal_frame.c riscv/Lstep.c riscv/getcontext.S + riscv/setcontext.S riscv/Lget_save_loc.c riscv/Linit_local.c riscv/Lregs.c riscv/Lcreate_addr_space.c riscv/Lglobal.c riscv/Linit_remote.c riscv/Lresume.c diff --git a/src/native/external/libunwind/CMakeLists.txt b/src/native/external/libunwind/CMakeLists.txt index 245a41bfbb35d5..2a33b37893980d 100644 --- a/src/native/external/libunwind/CMakeLists.txt +++ b/src/native/external/libunwind/CMakeLists.txt @@ -37,6 +37,11 @@ elseif ('$ENV{TARGET}' STREQUAL 'loongarch64-linux-gnu') set(arch loongarch64) add_definitions(-D__loongarch64) add_definitions(-D__linux__) +elseif ('$ENV{TARGET}' STREQUAL 'riscv64-linux-gnu') + set(TARGET_RISCV 1) + set(arch riscv) + add_definitions(-D__riscv) + add_definitions(-D__linux__) else () message(FATAL_ERROR "Unrecognize value in environment variable TARGET") endif () diff --git a/src/native/external/libunwind/src/CMakeLists.txt b/src/native/external/libunwind/src/CMakeLists.txt index b63f5b3105e86d..2925b4d711ad72 100644 --- a/src/native/external/libunwind/src/CMakeLists.txt +++ b/src/native/external/libunwind/src/CMakeLists.txt @@ -326,6 +326,38 @@ SET(libunwind_loongarch64_la_SOURCES_loongarch loongarch64/Gis_signal_frame.c loongarch64/Gregs.c loongarch64/Gresume.c loongarch64/Gstep.c ) +# The list of files that go into libunwind and libunwind-riscv: +SET(libunwind_la_SOURCES_riscv_common + ${libunwind_la_SOURCES_common} + riscv/is_fpreg.c + riscv/regname.c +) + +# The list of files that go into libunwind: +SET(libunwind_la_SOURCES_riscv + ${libunwind_la_SOURCES_riscv_common} + ${libunwind_la_SOURCES_local} + riscv/setcontext.S + riscv/Lapply_reg_state.c riscv/Lreg_states_iterate.c + riscv/Lcreate_addr_space.c riscv/Lget_proc_info.c + riscv/Lget_save_loc.c riscv/Lglobal.c riscv/Linit.c + riscv/Linit_local.c riscv/Linit_remote.c + riscv/Lis_signal_frame.c riscv/Lregs.c riscv/Lresume.c + riscv/Lstep.c riscv/getcontext.S +) + +SET(libunwind_riscv_la_SOURCES_riscv + ${libunwind_la_SOURCES_riscv_common} + ${libunwind_la_SOURCES_generic} + riscv/Gapply_reg_state.c riscv/Greg_states_iterate.c + riscv/Gcreate_addr_space.c riscv/Gget_proc_info.c + riscv/Gget_save_loc.c riscv/Gglobal.c riscv/Ginit.c + riscv/Ginit_local.c riscv/Ginit_remote.c + riscv/Gis_signal_frame.c riscv/Gregs.c riscv/Gresume.c + riscv/Gstash_frame.c riscv/Gstep.c +) + + if(TARGET_AARCH64) SET(libunwind_la_SOURCES ${libunwind_la_SOURCES_aarch64}) SET(libunwind_remote_la_SOURCES ${libunwind_aarch64_la_SOURCES_aarch64}) @@ -349,6 +381,11 @@ elseif(TARGET_LOONGARCH64) SET(libunwind_la_SOURCES ${libunwind_la_SOURCES_loongarch64}) SET(libunwind_remote_la_SOURCES ${libunwind_loongarch64_la_SOURCES_loongarch}) SET(libunwind_elf_la_SOURCES ${libunwind_elf64_la_SOURCES}) +elseif(TARGET_RISCV) + SET(libunwind_la_SOURCES ${libunwind_la_SOURCES_riscv}) + SET(libunwind_remote_la_SOURCES ${libunwind_riscv_la_SOURCES_riscv}) + SET(libunwind_elf_la_SOURCES ${libunwind_elf64_la_SOURCES}) + list(APPEND libunwind_setjmp_la_SOURCES riscv/siglongjmp.S) endif() add_library(libunwind diff --git a/src/tests/Directory.Build.props b/src/tests/Directory.Build.props index 437d976fa78b37..77f4f8f7191c9b 100644 --- a/src/tests/Directory.Build.props +++ b/src/tests/Directory.Build.props @@ -141,6 +141,7 @@ 64 64 64 + 64 diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128B.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128B.cpp index 97175c18a9ab32..30c97f7601c845 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128B.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128B.cpp @@ -10,7 +10,7 @@ #include typedef __m128i Vector128B; -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(_MSC_VER) #if defined(TARGET_ARM64) #include diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128C.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128C.cpp index 146cfba6927ca8..3535f8117b3ea0 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128C.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128C.cpp @@ -10,7 +10,7 @@ #include typedef __m128i Vector128C; -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(_MSC_VER) #if defined(TARGET_ARM64) #include diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128D.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128D.cpp index f3c7aa4b5f28a4..dec36ae8a02401 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128D.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128D.cpp @@ -10,7 +10,7 @@ #include typedef __m128d Vector128D; -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(_MSC_VER) #if defined(TARGET_ARM64) #include diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128F.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128F.cpp index f5c4d237b77cc3..12319c75c2322e 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128F.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128F.cpp @@ -10,7 +10,7 @@ #include typedef __m128 Vector128F; -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(_MSC_VER) #if defined(TARGET_ARM64) #include diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128L.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128L.cpp index 211249ccf86963..38267e3e533aa4 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128L.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128L.cpp @@ -10,7 +10,7 @@ #include typedef __m128i Vector128L; -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(_MSC_VER) #if defined(TARGET_ARM64) #include diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128U.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128U.cpp index 08c6cfc4def3b1..31053317d38190 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128U.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector128U.cpp @@ -10,7 +10,7 @@ #include typedef __m128i Vector128U; -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(_MSC_VER) #if defined(TARGET_ARM64) #include diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256B.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256B.cpp index f51ea87767ea16..d07640115a37a7 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256B.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256B.cpp @@ -8,7 +8,7 @@ #if defined(TARGET_XARCH) #include -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Intentionally empty #else #error Unsupported target architecture diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256C.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256C.cpp index a489455f429944..db38b9dbad3eaf 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256C.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256C.cpp @@ -8,7 +8,7 @@ #if defined(TARGET_XARCH) #include -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Intentionally empty #else #error Unsupported target architecture diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256D.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256D.cpp index 46d0c7d2ac72a5..94cf5a5bbbc861 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256D.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256D.cpp @@ -8,7 +8,7 @@ #if defined(TARGET_XARCH) #include -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Intentionally empty #else #error Unsupported target architecture diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256F.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256F.cpp index 3d62cd6ed1a87b..6ba7b6a4e98dd9 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256F.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256F.cpp @@ -8,7 +8,7 @@ #if defined(TARGET_XARCH) #include -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Intentionally empty #else #error Unsupported target architecture diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256L.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256L.cpp index 797385db5fa34c..e9c39910d1c219 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256L.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256L.cpp @@ -8,7 +8,7 @@ #if defined(TARGET_XARCH) #include -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Intentionally empty #else #error Unsupported target architecture diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256U.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256U.cpp index d433035d5487a7..c5b2b03a2c858c 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256U.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector256U.cpp @@ -8,7 +8,7 @@ #if defined(TARGET_XARCH) #include -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Intentionally empty #else #error Unsupported target architecture diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64B.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64B.cpp index 8563ba4e0224d8..25177acb95b1be 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64B.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64B.cpp @@ -10,7 +10,7 @@ #include typedef __m64 Vector64B; -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(_MSC_VER) #if defined(TARGET_ARM64) #include diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64C.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64C.cpp index 8f597d3af46823..8e8728fc2695cd 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64C.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64C.cpp @@ -10,7 +10,7 @@ #include typedef __m64 Vector64C; -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(_MSC_VER) #if defined(TARGET_ARM64) #include diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64D.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64D.cpp index 98b05198502758..86e37ef3acd6f2 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64D.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64D.cpp @@ -10,7 +10,7 @@ #include typedef __m64 Vector64D; -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(_MSC_VER) #if defined(TARGET_ARM64) #include diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64F.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64F.cpp index f4b5aad32781fe..deac901aee8efe 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64F.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64F.cpp @@ -10,7 +10,7 @@ #include typedef __m64 Vector64F; -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(_MSC_VER) #if defined(TARGET_ARM64) #include diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64L.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64L.cpp index 8401ce572245ff..28293f11a5435d 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64L.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64L.cpp @@ -10,7 +10,7 @@ #include typedef __m64 Vector64L; -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(_MSC_VER) #if defined(TARGET_ARM64) #include diff --git a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64U.cpp b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64U.cpp index 8a0f79faf2b733..9ae5a233bd39cb 100644 --- a/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64U.cpp +++ b/src/tests/Interop/PInvoke/Generics/GenericsNative.Vector64U.cpp @@ -10,7 +10,7 @@ #include typedef __m64 Vector64U; -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(_MSC_VER) #if defined(TARGET_ARM64) #include diff --git a/src/tests/Interop/StructMarshalling/PInvoke/MarshalStructAsParamDLL.h b/src/tests/Interop/StructMarshalling/PInvoke/MarshalStructAsParamDLL.h index 3f77f7ffac1e3b..c85b1e6f62dc05 100644 --- a/src/tests/Interop/StructMarshalling/PInvoke/MarshalStructAsParamDLL.h +++ b/src/tests/Interop/StructMarshalling/PInvoke/MarshalStructAsParamDLL.h @@ -197,7 +197,7 @@ struct OUTER3 static_assert_no_msg(sizeof(OUTER3) == 28); #endif #else // WINDOWS -#if defined(__x86_64__) || defined(__aarch64__) || defined(__loongarch64) +#if defined(__x86_64__) || defined(__aarch64__) || defined(__loongarch64) || defined(__riscv) union OUTER3 { struct InnerSequential arr[2]; diff --git a/src/tests/JIT/IL_Conformance/Old/Conformance_Base/conv_ovf_i8_i.ilproj b/src/tests/JIT/IL_Conformance/Old/Conformance_Base/conv_ovf_i8_i.ilproj index 8481e61c40a029..dfcd5e64ee66a9 100644 --- a/src/tests/JIT/IL_Conformance/Old/Conformance_Base/conv_ovf_i8_i.ilproj +++ b/src/tests/JIT/IL_Conformance/Old/Conformance_Base/conv_ovf_i8_i.ilproj @@ -5,6 +5,7 @@ true true true + true PdbOnly diff --git a/src/tests/run.sh b/src/tests/run.sh index 6b40dadd57854a..dd36929cfb84b6 100755 --- a/src/tests/run.sh +++ b/src/tests/run.sh @@ -95,6 +95,9 @@ do loongarch64) buildArch="loongarch64" ;; + riscv64) + buildArch="riscv64" + ;; wasm) buildArch="wasm" ;;