diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 7bc995a0890621..1a3a972fd17019 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -7884,7 +7884,22 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va isSimple = false; size = EA_SCALABLE; attr = size; - fmt = isVectorRegister(reg1) ? IF_SVE_IE_2A : IF_SVE_ID_2A; + if (isPredicateRegister(reg1)) + { + assert(offs == 0); + // For predicate, generate based off rsGetRsvdReg() + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + + // add rsvd, fp, #imm + emitIns_R_R_I(INS_add, EA_8BYTE, rsvdReg, reg2, imm); + // str p0, [rsvd, #0, mul vl] + emitIns_R_R_I(ins, attr, reg1, rsvdReg, 0); + + return; + } + + assert(isVectorRegister(reg1)); + fmt = IF_SVE_IE_2A; // TODO-SVE: Don't assume 128bit vectors // Predicate size is vector length / 8 @@ -8138,7 +8153,24 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va isSimple = false; size = EA_SCALABLE; attr = size; - fmt = isVectorRegister(reg1) ? IF_SVE_JH_2A : IF_SVE_JG_2A; + + if (isPredicateRegister(reg1)) + { + assert(offs == 0); + + // For predicate, generate based off rsGetRsvdReg() + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + + // add rsvd, fp, #imm + emitIns_R_R_I(INS_add, EA_8BYTE, rsvdReg, reg2, imm); + // str p0, [rsvd, #0, mul vl] + emitIns_R_R_I(ins, attr, reg1, rsvdReg, 0); + + return; + } + + assert(isVectorRegister(reg1)); + fmt = IF_SVE_JH_2A; // TODO-SVE: Don't assume 128bit vectors // Predicate size is vector length / 8 diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 25cfcb84452d46..25d75e885f491d 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -1205,6 +1205,11 @@ inline static bool isHighPredicateRegister(regNumber reg) return (reg >= REG_PREDICATE_HIGH_FIRST) && (reg <= REG_PREDICATE_HIGH_LAST); } +inline static bool isMaskReg(regNumber reg) +{ + return isPredicateRegister(reg); +} + inline static bool isEvenRegister(regNumber reg) { if (isGeneralRegister(reg)) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index b74d259c632b31..2b8b854c8ff2e1 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -5593,7 +5593,7 @@ unsigned Compiler::lvaGetMaxSpillTempSize() * Doing this all in one pass is 'hard'. So instead we do it in 2 basic passes: * 1. Assign all the offsets relative to the Virtual '0'. Offsets above (the * incoming arguments) are positive. Offsets below (everything else) are - * negative. This pass also calcuates the total frame size (between Caller's + * negative. This pass also calculates the total frame size (between Caller's * SP/return address and the Ambient SP). * 2. Figure out where to place the frame pointer, and then adjust the offsets * as needed for the final stack size and whether the offset is frame pointer diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 9f24ab0ce71e61..6a1e6520db719c 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -508,13 +508,13 @@ class RegRecord : public Referenceable { registerType = FloatRegisterType; } -#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) +#if defined(FEATURE_MASKED_HW_INTRINSICS) else { assert(emitter::isMaskReg(reg)); registerType = MaskRegisterType; } -#endif +#endif // FEATURE_MASKED_HW_INTRINSICS regNum = reg; isCalleeSave = ((RBM_CALLEE_SAVED & genRegMask(reg)) != 0); } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index e10b0831ec826e..68482622587006 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -855,6 +855,9 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) #else killMask.RemoveRegsetForType(RBM_FLT_CALLEE_TRASH.GetFloatRegSet(), FloatRegisterType); +#if defined(TARGET_ARM64) + killMask.RemoveRegsetForType(RBM_MSK_CALLEE_TRASH.GetPredicateRegSet(), MaskRegisterType); +#endif // TARGET_ARM64 #endif // TARGET_XARCH } #ifdef TARGET_ARM @@ -1148,8 +1151,8 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo { continue; } - Interval* interval = getIntervalForLocalVar(varIndex); - const bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); + Interval* interval = getIntervalForLocalVar(varIndex); + const bool isCallKill = ((killMask.getLow() == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); SingleTypeRegSet regsKillMask = killMask.GetRegSetForType(interval->registerType); if (isCallKill) diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index 24448466d4715a..9721b31b78cb17 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -75,8 +75,15 @@ #define RBM_FLT_CALLEE_SAVED (RBM_V8|RBM_V9|RBM_V10|RBM_V11|RBM_V12|RBM_V13|RBM_V14|RBM_V15) #define RBM_FLT_CALLEE_TRASH (RBM_V0|RBM_V1|RBM_V2|RBM_V3|RBM_V4|RBM_V5|RBM_V6|RBM_V7|RBM_V16|RBM_V17|RBM_V18|RBM_V19|RBM_V20|RBM_V21|RBM_V22|RBM_V23|RBM_V24|RBM_V25|RBM_V26|RBM_V27|RBM_V28|RBM_V29|RBM_V30|RBM_V31) + #define RBM_LOWMASK (RBM_P0|RBM_P1|RBM_P2|RBM_P3|RBM_P4|RBM_P5|RBM_P6|RBM_P7) + #define RBM_HIGHMASK (RBM_P8|RBM_P9|RBM_P10| RBM_P11|RBM_P12|RBM_P13|RBM_P14|RBM_P15) + #define RBM_ALLMASK (RBM_LOWMASK|RBM_HIGHMASK) + + #define RBM_MSK_CALLEE_SAVED (0) + #define RBM_MSK_CALLEE_TRASH RBM_ALLMASK + #define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED) - #define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH) + #define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH | RBM_MSK_CALLEE_TRASH) #define REG_DEFAULT_HELPER_CALL_TARGET REG_R12 #define RBM_DEFAULT_HELPER_CALL_TARGET RBM_R12 @@ -146,14 +153,6 @@ #define REG_JUMP_THUNK_PARAM REG_R12 #define RBM_JUMP_THUNK_PARAM RBM_R12 - #define RBM_LOWMASK (RBM_P0 | RBM_P1 | RBM_P2 | RBM_P3 | RBM_P4 | RBM_P5 | RBM_P6 | RBM_P7) - #define RBM_HIGHMASK (RBM_P8 | RBM_P9 | RBM_P10 | RBM_P11 | RBM_P12 | RBM_P13 | RBM_P14 | RBM_P15) - #define RBM_ALLMASK (RBM_LOWMASK | RBM_HIGHMASK) - - // TODO-SVE: Fix when adding predicate register allocation - #define RBM_MSK_CALLEE_SAVED (0) - #define RBM_MSK_CALLEE_TRASH (0) - // ARM64 write barrier ABI (see vm\arm64\asmhelpers.asm, vm\arm64\asmhelpers.S): // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier): // On entry: