diff --git a/.config/dotnet-tools.json b/.config/dotnet-tools.json
index b634592537a5b0..82714a408dfed2 100644
--- a/.config/dotnet-tools.json
+++ b/.config/dotnet-tools.json
@@ -15,7 +15,7 @@
]
},
"microsoft.dotnet.xharness.cli": {
- "version": "10.0.0-prerelease.25103.1",
+ "version": "10.0.0-prerelease.25164.1",
"commands": [
"xharness"
]
diff --git a/Directory.Build.props b/Directory.Build.props
index 03f77f38f6dc4b..d13e0749062eaf 100644
--- a/Directory.Build.props
+++ b/Directory.Build.props
@@ -82,9 +82,6 @@
10.0
net$(AspNetCoreAppCurrentVersion)
- net462
- net48
- net472
diff --git a/docs/area-owners.md b/docs/area-owners.md
index 7c7d703d0e4563..2aeaa8770f42dd 100644
--- a/docs/area-owners.md
+++ b/docs/area-owners.md
@@ -42,8 +42,8 @@ Note: Editing this file doesn't update the mapping used by `@dotnet-policy-servi
| area-Extensions-Primitives | @ericstj | @dotnet/area-extensions-primitives | |
| area-GC-coreclr | @mangod9 | @Maoni0 | |
| area-GC-mono | @mangod9 | @mangod9 | @BrzVlad to consult |
-| area-Host | @agocke | @jeffschwMSFT @vitek-karas @vsadov | Issues with dotnet.exe including bootstrapping, framework detection, hostfxr.dll and hostpolicy.dll |
-| area-HostModel | @agocke | @vitek-karas | |
+| area-Host | @agocke | @jeffschwMSFT @elinor-fung @vsadov | Issues with dotnet.exe including bootstrapping, framework detection, hostfxr.dll and hostpolicy.dll |
+| area-HostModel | @agocke | @elinor-fung | |
| area-ILTools-coreclr | @JulieLeeMSFT | @BruceForstall @dotnet/jit-contrib | |
| area-Infrastructure | @agocke | @jeffschwMSFT @MichaelSimons | |
| area-Infrastructure-coreclr | @agocke | @jeffschwMSFT | |
@@ -62,7 +62,7 @@ Note: Editing this file doesn't update the mapping used by `@dotnet-policy-servi
| area-ReadyToRun-coreclr | @steveisok | @dotnet/area-type-system-and-startup | |
| area-Serialization | @HongGit | @StephenMolloy @HongGit | Packages:
- System.Runtime.Serialization.Xml
- System.Runtime.Serialization.Json
- System.Private.DataContractSerialization
- System.Xml.XmlSerializer
Excluded:- System.Runtime.Serialization.Formatters
|
| area-Setup | @MichaelSimons | @NikolaMilosavljevic | Distro-specific (Linux, Mac and Windows) setup packages and msi files |
-| area-Single-File | @agocke | @vitek-karas @vsadov | |
+| area-Single-File | @agocke | @elinor-fung @vsadov | |
| area-Snap | @MichaelSimons | @NikolaMilosavljevic @leecow @MichaelSimons | |
| area-System.Buffers | @jeffhandley | @dotnet/area-system-buffers | |
| area-System.ClientModel | @terrajobst | @dotnet/fxdc | Bugs and feature requests should go to https://github.com/Azure/azure-sdk-for-net/issues. We don't own the code, but FXDC reviews changes to determine overlap with other `System` concepts. The Azure SDK team will post API updates in this repo for us to review. |
diff --git a/docs/coding-guidelines/project-guidelines.md b/docs/coding-guidelines/project-guidelines.md
index 35059779fa370b..0006e111fa1a83 100644
--- a/docs/coding-guidelines/project-guidelines.md
+++ b/docs/coding-guidelines/project-guidelines.md
@@ -69,7 +69,7 @@ When building an individual project the `BuildTargetFramework` and `TargetOS` wi
## Supported full build settings
- .NET Core latest on current OS (default) -> `$(NetCoreAppCurrent)-[RunningOS]`
-- .NET Framework latest -> `net48`
+- .NET Framework latest -> `net481`
# Library project guidelines
diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md
index 9b69c69e67d82b..62e75ab0f7a125 100644
--- a/docs/design/coreclr/botr/clr-abi.md
+++ b/docs/design/coreclr/botr/clr-abi.md
@@ -452,7 +452,7 @@ The code this finally returns to looks like this:
In this case, it zeros out the ShadowSP slot that it previously set to 0xFC, then jumps to the address that is the actual target of the leave from the finally.
-The JIT does this "end finally restore" by creating a GT_END_LFIN tree node, with the appropriate stack level as an operand, that generates this code.
+The JIT does this "end finally restore" by creating a GT_END_LFIN tree node, with the appropriate EH region ID as an operand, that generates this code.
In the case of an exceptional 'finally' invocation, the VM sets up the 'return address' to whatever address it wants the JIT to return to.
@@ -476,7 +476,7 @@ The VM walks the ShadowSP slots in the function `GetHandlerFrameInfo()`, and set
An aside on the JIT implementation for x86.
-The JIT creates BBJ_CALLFINALLY/BBJ_ALWAYS pairs for calling the 'finally' clause. The BBJ_CALLFINALLY block will have a series of CORINFO_JIT_ENDCATCH calls appended at the end, if we need to "leave" a series of nested catches before calling the finally handler (due to a single 'leave' opcode attempting to leave multiple levels of different types of handlers). Then, a GT_END_LFIN statement with the finally clause handler nesting level as an argument is added to the step block where the finally returns to. This is used to generate code to zero out the appropriate level of the ShadowSP slot array after the finally has been executed. The BBJ_CALLFINALLY block itself generates the code to insert the 0xFC value into the ShadowSP slot array. If the 'finally' is invoked by the VM, in exceptional cases, then the VM itself updates the ShadowSP slot array before invoking the 'finally'.
+The JIT creates BBJ_CALLFINALLY/BBJ_ALWAYS pairs for calling the 'finally' clause. The BBJ_CALLFINALLY block will have a series of CORINFO_JIT_ENDCATCH calls appended at the end, if we need to "leave" a series of nested catches before calling the finally handler (due to a single 'leave' opcode attempting to leave multiple levels of different types of handlers). Then, a GT_END_LFIN statement with EH region ID as an argument is added to the step block where the finally returns to. This is used to generate code to zero out the appropriate level of the ShadowSP slot array after the finally has been executed and the final EH nesting depth is known. The BBJ_CALLFINALLY block itself generates the code to insert the 0xFC value into the ShadowSP slot array. If the 'finally' is invoked by the VM, in exceptional cases, then the VM itself updates the ShadowSP slot array before invoking the 'finally'.
At the end of a finally or filter, a GT_RETFILT is inserted. For a finally, this is a TYP_VOID which is just a placeholder. For a filter, it takes an argument which evaluates to the return value from the filter. On legacy JIT, this tree triggers the generation of both the return value load (for filters) and the "funclet" exit sequence, which is either a "pop eax; jmp eax" for a finally, or a "ret" for a filter. When processing the BBJ_EHFINALLYRET or BBJ_EHFILTERRET block itself (at the end of code generation for the block), nothing is generated. In RyuJIT, the GT_RETFILT only loads up the return value (for filters) and does nothing for finally, and the block type processing after all the tree processing triggers the exit sequence to be generated. There is no real difference between these, except to centralize all "exit sequence" generation in the same place.
diff --git a/docs/design/datacontracts/PrecodeStubs.md b/docs/design/datacontracts/PrecodeStubs.md
index b9448ed0507f7c..83f593fcf2c0ad 100644
--- a/docs/design/datacontracts/PrecodeStubs.md
+++ b/docs/design/datacontracts/PrecodeStubs.md
@@ -9,7 +9,7 @@ This contract provides support for examining [precode](../coreclr/botr/method-de
TargetPointer GetMethodDescFromStubAddress(TargetCodePointer entryPoint);
```
-## Version 1
+## Version 1 and 2
Data descriptors used:
| Data Descriptor Name | Field | Meaning |
@@ -24,9 +24,12 @@ Data descriptors used:
| PrecodeMachineDescriptor | PInvokeImportPrecodeType| precode sort byte for PInvoke precode stubs, if supported |
| PrecodeMachineDescriptor | HasFixupPrecode | 1 if platform supports fixup precode stubs |
| PrecodeMachineDescriptor | FixupPrecodeType| precode sort byte for fixup precode stubs, if supported |
-| StubPrecodeData | MethodDesc | pointer to the MethodDesc associated with this stub precode |
+| PrecodeMachineDescriptor | ThisPointerRetBufPrecodeType | precode sort byte for this pointer ret buf precodes |
+| StubPrecodeData | MethodDesc | pointer to the MethodDesc associated with this stub precode (Version 1 only) |
+| StubPrecodeData | SecretParam | pointer to the MethodDesc associated with this stub precode or a second stub data pointer for other types (Version 2 only) |
| StubPrecodeData | Type | precise sort of stub precode |
| FixupPrecodeData | MethodDesc | pointer to the MethodDesc associated with this fixup precode |
+| ThisPtrRetBufPrecodeData | MethodDesc | pointer to the MethodDesc associated with the ThisPtrRetBufPrecode (Version 2 only) |
arm32 note: the `CodePointerToInstrPointerMask` is used to convert IP values that may include an arm Thumb bit (for example extracted from disassembling a call instruction or from a snapshot of the registers) into an address. On other architectures applying the mask is a no-op.
@@ -145,7 +148,10 @@ After the initial precode type is determined, for stub precodes a refined precod
internal override TargetPointer GetMethodDesc(Target target, Data.PrecodeMachineDescriptor precodeMachineDescriptor)
{
TargetPointer stubPrecodeDataAddress = InstrPointer + precodeMachineDescriptor.StubCodePageSize;
- return target.ReadPointer (stubPrecodeDataAddress + /* offset of StubPrecodeData.MethodDesc */ );
+ if (ContractVersion(PrecodeStubs) == 1)
+ return target.ReadPointer (stubPrecodeDataAddress + /* offset of StubPrecodeData.MethodDesc */ );
+ else
+ return target.ReadPointer (stubPrecodeDataAddress + /* offset of StubPrecodeData.SecretParam */ );
}
}
@@ -170,7 +176,10 @@ After the initial precode type is determined, for stub precodes a refined precod
internal override TargetPointer GetMethodDesc(Target target, Data.PrecodeMachineDescriptor precodeMachineDescriptor)
{
- throw new NotImplementedException(); // TODO(cdac)
+ if (ContractVersion(PrecodeStubs) == 1)
+ throw new NotImplementedException(); // TODO(cdac)
+ else
+ return target.ReadPointer(target.ReadPointer (stubPrecodeDataAddress + /* offset of StubPrecodeData.SecretParam */ ) + /*offset of ThisPtrRetBufPrecodeData.MethodDesc*/);
}
}
diff --git a/docs/design/datacontracts/StackWalk.md b/docs/design/datacontracts/StackWalk.md
index d76aece42731f9..774be93cdf7ea6 100644
--- a/docs/design/datacontracts/StackWalk.md
+++ b/docs/design/datacontracts/StackWalk.md
@@ -41,11 +41,25 @@ This contract depends on the following descriptors:
| `InlinedCallFrame` | `CalleeSavedFP` | FP saved in Frame |
| `SoftwareExceptionFrame` | `TargetContext` | Context object saved in Frame |
| `SoftwareExceptionFrame` | `ReturnAddress` | Return address saved in Frame |
+| `FramedMethodFrame` | `TransitionBlockPtr` | Pointer to Frame's TransitionBlock |
+| `TransitionBlock` | `ReturnAddress` | Return address associated with the TransitionBlock |
+| `TransitionBlock` | `CalleeSavedRegisters` | Platform specific CalleeSavedRegisters struct associated with the TransitionBlock |
+| `FuncEvalFrame` | `DebuggerEvalPtr` | Pointer to the Frame's DebuggerEval object |
+| `DebuggerEval` | `TargetContext` | Context saved inside DebuggerEval |
+| `DebuggerEval` | `EvalDuringException` | Flag used in processing FuncEvalFrame |
+| `ResumableFrame` | `TargetContextPtr` | Pointer to the Frame's Target Context |
+| `FaultingExceptionFrame` | `TargetContext` | Frame's Target Context |
+| `HijackFrame` | `ReturnAddress` | Frame's stored instruction pointer |
+| `HijackFrame` | `HijackArgsPtr` | Pointer to the Frame's stored HijackArgs |
+| `HijackArgs` (amd64) | `CalleeSavedRegisters` | CalleeSavedRegisters data structure |
+| `HijackArgs` (amd64 Windows) | `Rsp` | Saved stack pointer |
+| `HijackArgs` (arm64) | For each register `r` saved in HijackArgs, `r` | Register names associated with stored register values |
+| `CalleeSavedRegisters` | For each callee saved register `r`, `r` | Register names associated with stored register values |
Global variables used:
| Global Name | Type | Purpose |
| --- | --- | --- |
-| For each FrameType ``, `##Identifier` | FrameIdentifier enum value | Identifier used to determine concrete type of Frames |
+| For each FrameType ``, `##Identifier` | `FrameIdentifier` enum value | Identifier used to determine concrete type of Frames |
Contracts used:
| Contract Name |
@@ -215,6 +229,71 @@ private static void bar()
}
```
+### Capital 'F' Frame Handling
+
+Capital 'F' Frame's store context data in a number of different ways. Of the couple dozen Frame types defined in `src/coreclr/vm/frames.h` several do not store any context data or update the context, signified by `NeedsUpdateRegDisplay_Impl() == false`. Of that Frames that do update the context, several share implementations of `UpdateRegDisplay_Impl` through inheritance. This leaves us with 9 distinct mechanisms to update the context that will be detailed below. Each mechanism is referred to using the Frame class that implements the mechanism and may be used by subclasses.
+
+Most of the handlers are implemented in `BaseFrameHandler`. Platform specific components are implemented/overridden in `FrameHandler`.
+
+#### InlinedCallFrame
+
+InlinedCallFrames store and update only the IP, SP, and FP of a given context. If the stored IP (CallerReturnAddress) is 0 then the InlinedCallFrame does not have an active call and should not update the context.
+
+#### SoftwareExceptionFrame
+
+SoftwareExceptionFrames store a copy of the context struct. The IP, SP, and all ABI specified (platform specific) callee-saved registers are copied from the stored context to the working context.
+
+#### TransitionFrame
+
+TransitionFrames hold a pointer to a `TransitionBlock`. The TransitionBlock holds a return address along with a `CalleeSavedRegisters` struct which has values for all ABI specified callee-saved registers. The SP can be found using the address of the TransitionBlock. Since the TransitionBlock will be the lowest element on the stack, the SP is the address of the TransitionBlock + sizeof(TransitionBlock).
+
+When updating the context from a TransitionFrame, the IP, SP, and all ABI specified callee-saved registers are copied over.
+
+The following Frame types also use this mechanism:
+* FramedMethodFrame
+* CLRToCOMMethodFrame
+* PInvokeCallIFrame
+* PrestubMethodFrame
+* StubDispatchFrame
+* CallCountingHelperFrame
+* ExternalMethodFrame
+* DynamicHelperFrame
+
+#### FuncEvalFrame
+
+FuncEvalFrames hold a pointer to a `DebuggerEval`. The DebuggerEval holds a full context which is completely copied over to the working context when updating.
+
+#### ResumableFrame
+
+ResumableFrames hold a pointer to a context object (Note this is different from SoftwareExceptionFrames which hold the context directly). The entire context object is copied over to the working context when updating.
+
+RedirectedThreadFrames also use this mechanism.
+
+#### FaultingExceptionFrame
+
+FaultingExceptionFrames have two different implementations. One for Windows x86 and another for all other builds (with funclets).
+
+Given the cDAC does not yet support Windows x86, this version is not supported.
+
+The other version stores a context struct. To update the working context, the entire stored context is copied over. In addition the `ContextFlags` are updated to ensure the `CONTEXT_XSTATE` bit is not set given the debug version of the contexts can not store extended state. This bit is architecture specific.
+
+#### HijackFrame
+
+HijackFrames carry a IP (ReturnAddress) and a pointer to `HijackArgs`. All platforms update the IP and use the platform specific HijackArgs to update further registers. The following details currently implemented platforms.
+
+* x64 - On x64, HijackArgs contains a CalleeSavedRegister struct. The saved registers values contained in the struct are copied over to the working context.
+ * Windows - On Windows, HijackArgs also contains the SP value directly which is copied over to the working context.
+ * Non-Windows - On OS's other than Windows, HijackArgs does not contain an SP value. Instead since the HijackArgs struct lives on the stack, the SP is `&hijackArgs + sizeof(HijackArgs)`. This value is also copied over.
+* arm64 - Unlike on x64, on arm64 HijackArgs contains a list of register values instead of the CalleeSavedRegister struct. These values are copied over to the working context. The SP is fetched using the same technique as on x64 non-Windows where `SP = &hijackArgs + sizeof(HijackArgs)` and is copied over to the working context.
+
+#### TailCallFrame
+
+TailCallFrames are only used on Windows x86 which is not yet supported in the cDAC and therefore not implemented.
+
+#### HelperMethodFrame
+
+HelperMethodFrames are on the way to being removed. They are not currently supported in the cDAC.
+
### APIs
The majority of the contract's complexity is the stack walking algorithm (detailed above) implemented as part of `CreateStackWalk`.
diff --git a/docs/design/specs/runtime-async.md b/docs/design/specs/runtime-async.md
index 0929c6d931ad5d..fc229b0ac78031 100644
--- a/docs/design/specs/runtime-async.md
+++ b/docs/design/specs/runtime-async.md
@@ -49,6 +49,15 @@ Async methods support suspension using one of the following methods:
public static T Await(Task task);
[MethodImpl(MethodImplOptions.Async)]
public static T Await(ValueTask task);
+
+ [MethodImpl(MethodImplOptions.Async)]
+ public static void Await(ConfiguredTaskAwaitable configuredAwaitable);
+ [MethodImpl(MethodImplOptions.Async)]
+ public static void Await(ConfiguredValueTaskAwaitable configuredAwaitable);
+ [MethodImpl(MethodImplOptions.Async)]
+ public static T Await(ConfiguredTaskAwaitable configuredAwaitable);
+ [MethodImpl(MethodImplOptions.Async)]
+ public static T Await(ConfiguredValueTaskAwaitable configuredAwaitable);
}
}
```
diff --git a/docs/project/list-of-diagnostics.md b/docs/project/list-of-diagnostics.md
index 7bed18fa502dcc..c40d4a50b30025 100644
--- a/docs/project/list-of-diagnostics.md
+++ b/docs/project/list-of-diagnostics.md
@@ -112,7 +112,7 @@ The PR that reveals the implementation of the `
true
- true
$(ArtifactsDir)staging/
$(ArtifactsStagingDir)SymStore
diff --git a/eng/Signing.props b/eng/Signing.props
index f98f72e13c1d43..0623aaa1c18cc1 100644
--- a/eng/Signing.props
+++ b/eng/Signing.props
@@ -58,16 +58,6 @@
-
-
-
-
-
-
+
https://github.com/dotnet/source-build-reference-packages
- 232bcf31aad21949f80d6706720540b85e43fff3
+ c3d4c372a15c2de79a2f26fe2b6b3644996d8550
@@ -336,17 +336,17 @@
https://github.com/dotnet/runtime
088d199063dd1bf7fa00a445d23f93f915f84b31
-
+
https://github.com/dotnet/xharness
- cf1b2925785f504d4d52773bcab470044e35ea15
+ 6df4f3a2cd57adbde1607bba67f50613102d8bd9
-
+
https://github.com/dotnet/xharness
- cf1b2925785f504d4d52773bcab470044e35ea15
+ 6df4f3a2cd57adbde1607bba67f50613102d8bd9
-
+
https://github.com/dotnet/xharness
- cf1b2925785f504d4d52773bcab470044e35ea15
+ 6df4f3a2cd57adbde1607bba67f50613102d8bd9
https://github.com/dotnet/arcade
diff --git a/eng/Versions.props b/eng/Versions.props
index e37c9e11d95bbd..24a6a7432c10d6 100644
--- a/eng/Versions.props
+++ b/eng/Versions.props
@@ -7,12 +7,12 @@
0
0
$(MajorVersion).0.100
- 9.0.2
+ 9.0.3
8.0.$([MSBuild]::Add($([System.Version]::Parse('$(PackageVersionNet9)').Build),11))
7.0.20
6.0.36
preview
- 3
+ 4
false
release
@@ -158,9 +158,9 @@
10.0.0-beta.25126.1
10.0.0-beta.25126.1
- 10.0.0-prerelease.25103.1
- 10.0.0-prerelease.25103.1
- 10.0.0-prerelease.25103.1
+ 10.0.0-prerelease.25164.1
+ 10.0.0-prerelease.25164.1
+ 10.0.0-prerelease.25164.1
10.0.0-alpha.0.25126.1
@@ -201,7 +201,7 @@
1.0.2
2.0.4
4.18.4
- 6.7.0
+ 8.0.2
2.14.3
2.9.1
diff --git a/eng/build-analysis-configuration.json b/eng/build-analysis-configuration.json
index 06bf441873e2dc..208e753c836201 100644
--- a/eng/build-analysis-configuration.json
+++ b/eng/build-analysis-configuration.json
@@ -15,6 +15,10 @@
{
"PipelineId": 157,
"PipelineName": "runtime-llvm"
+ },
+ {
+ "PipelineId": 265,
+ "PipelineName": "runtime-nativeaot-outerloop"
}
]
}
diff --git a/eng/build.ps1 b/eng/build.ps1
index e38be81d660ecd..dd41e2cf85fbe9 100644
--- a/eng/build.ps1
+++ b/eng/build.ps1
@@ -78,7 +78,7 @@ function Get-Help() {
Write-Host "Libraries settings:"
Write-Host " -coverage Collect code coverage when testing."
- Write-Host " -framework (-f) Build framework: net10.0 or net48."
+ Write-Host " -framework (-f) Build framework: net10.0 or net481."
Write-Host " [Default: net10.0]"
Write-Host " -testnobuild Skip building tests when invoking -test."
Write-Host " -testscope Scope tests, allowed values: innerloop, outerloop, all."
diff --git a/eng/build.sh b/eng/build.sh
index 1cab7739726fa3..a4e36af19ee956 100755
--- a/eng/build.sh
+++ b/eng/build.sh
@@ -66,7 +66,7 @@ usage()
echo "Libraries settings:"
echo " --coverage Collect code coverage when testing."
- echo " --framework (-f) Build framework: net10.0 or net48."
+ echo " --framework (-f) Build framework: net10.0 or net481."
echo " [Default: net10.0]"
echo " --testnobuild Skip building tests when invoking -test."
echo " --testscope Test scope, allowed values: innerloop, outerloop, all."
@@ -542,6 +542,10 @@ fi
if [[ "$os" == "browser" ]]; then
# override default arch for Browser, we only support wasm
arch=wasm
+ # because on docker instance without swap file, MSBuild nodes need to make some room for LLVM
+ # https://github.com/dotnet/runtime/issues/113724
+ # this is hexa percentage: 46-> 70%
+ export DOTNET_GCHeapHardLimitPercent="46"
fi
if [[ "$os" == "wasi" ]]; then
# override default arch for wasi, we only support wasm
diff --git a/eng/native/configureplatform.cmake b/eng/native/configureplatform.cmake
index 274ab363bf7905..b04bfc9ca1397c 100644
--- a/eng/native/configureplatform.cmake
+++ b/eng/native/configureplatform.cmake
@@ -522,7 +522,7 @@ if (CLR_CMAKE_TARGET_ANDROID OR CLR_CMAKE_TARGET_MACCATALYST OR CLR_CMAKE_TARGET
set(CLR_CMAKE_USE_SYSTEM_ZLIB 1)
endif()
-if (NOT CLR_CMAKE_TARGET_ANDROID)
+if (NOT CLR_CMAKE_TARGET_ANDROID AND NOT CLR_CMAKE_TARGET_BROWSER)
# opt into building tools like ildasm/ilasm
set(CLR_CMAKE_BUILD_TOOLS 1)
endif()
diff --git a/eng/pipelines/common/templates/pipeline-with-resources.yml b/eng/pipelines/common/templates/pipeline-with-resources.yml
index 0eb33099d4aa8a..bdf11a5c903610 100644
--- a/eng/pipelines/common/templates/pipeline-with-resources.yml
+++ b/eng/pipelines/common/templates/pipeline-with-resources.yml
@@ -17,118 +17,118 @@ extends:
containers:
linux_arm:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-arm@sha256:b42b92a3a7d04f0761698680dd8601c91e74124097ab6c43f364bd420f5abe46
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-arm
env:
ROOTFS_DIR: /crossrootfs/arm
linux_armv6:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-20.04-cross-armv6-raspbian-10@sha256:b3292e7f26790c74f3a5d311fc8294e3886199cfa31f499f34386b948dc37b0d
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-20.04-cross-armv6-raspbian-10
env:
ROOTFS_DIR: /crossrootfs/armv6
linux_arm64:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-arm64@sha256:e8ec62a221b9e1a07abb73eb1ddd3b86802fd50a14462142e9b13f2bf9208cd8
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-arm64
env:
ROOTFS_DIR: /crossrootfs/arm64
linux_musl_x64:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-amd64-musl@sha256:f244847db10686f8286961ef719957e1203142e274501be9a0fc28d44c81229c
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-amd64-musl
env:
ROOTFS_DIR: /crossrootfs/x64
linux_musl_arm:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-arm-musl@sha256:9b4c7dfb39577eecb0c44128a92bc8ac779afc5d1f400e6d478998f18faa3e1d
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-arm-musl
env:
ROOTFS_DIR: /crossrootfs/arm
linux_musl_arm64:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-arm64-musl@sha256:1a3ba98d92ba0242ede509deec9064df9593dc31f54cbe233d76a3475fa2897f
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-arm64-musl
env:
ROOTFS_DIR: /crossrootfs/arm64
# This container contains all required toolsets to build for Android and for Linux with bionic libc.
android:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-android-amd64@sha256:e9bb28569eebdea5122dc487874004f28b16c6168b6e02ceac1a341b002ab01e
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-android-amd64
# This container contains all required toolsets to build for Android and for Linux with bionic libc and a special layout of OpenSSL.
linux_bionic:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-android-openssl-amd64@sha256:20753705df1a6a799f26c2b5b00d400d5e261f4c60989539946d80cc92cb19d5
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-android-openssl-amd64
# This container contains all required toolsets to build for Android as well as tooling to build docker images.
android_docker:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-android-docker-amd64@sha256:b41b35c3254f975bf06d778ce457c6109cca6765926ad1e76e8fc7d5ba162698
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-android-docker-amd64
linux_x64:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-amd64@sha256:a851d98c330f4e5eaa32f694f29bfed82e8047cf90bfb8a5000aa3fbdda47b4a
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-amd64
env:
ROOTFS_DIR: /crossrootfs/x64
linux_x86:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-x86@sha256:fa77d0239e3d511423ac85103636a666fe0da67ba2f25d4aa2044390b2662688
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-x86
env:
ROOTFS_DIR: /crossrootfs/x86
linux_x64_dev_innerloop:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04@sha256:7458abba1a433923652d04b474bc26d488064801ed7bf395c3edd8746d78b146
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04
linux_musl_x64_dev_innerloop:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.19-WithNode@sha256:e2f2dddab2466124917a0fe09c5f8bf6678dac9d6e23b364dc6042819a1125a0
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.19-WithNode
linux_x64_sanitizer:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-amd64-sanitizer@sha256:a1b002dcb764fc63ac34f305b465cbc0461ba7ef9a2e7e0fa3b9dd0feb429182
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-amd64-sanitizer
env:
ROOTFS_DIR: /crossrootfs/x64
# We use a CentOS Stream 8 image here to test building from source on CentOS Stream 9.
SourceBuild_centos_x64:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream9@sha256:6725e979e408951fd2f0ce9533ea0120f29d2fc086152af2d830a1c40f49c975
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream9
# AlmaLinux 8 is a RHEL 8 rebuild, so we use it to test building from source on RHEL 8.
SourceBuild_linux_x64:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:almalinux-8-source-build@sha256:17fc48c23af4e41e909004cd474a6a799518d6b4d1335bb0b4fb4d01ea69cc4a
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:almalinux-8-source-build
linux_s390x:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-s390x@sha256:a6ff63ad83425b2003a7a7b4e8e0732d7cf9b12bce9e1c1ced91a76d289c5123
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-s390x
env:
ROOTFS_DIR: /crossrootfs/s390x
linux_ppc64le:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-ppc64le@sha256:8959695d5db2658e41af659b612e921a00fc490ba07fb1d4c1304724bacbe1c4
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-ppc64le
env:
ROOTFS_DIR: /crossrootfs/ppc64le
linux_riscv64:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-riscv64@sha256:d64f90c040c32f36ba4e90be5a276afb6adbd2daa92528906fed90140ea17137
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-riscv64
env:
ROOTFS_DIR: /crossrootfs/riscv64
linux_loongarch64:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-loongarch64@sha256:9587c99b1523cc074dd68bf6145200797fdd0d0c97b0ececf782c250ef0cfae4
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-loongarch64
env:
ROOTFS_DIR: /crossrootfs/loongarch64
debian-12-gcc14-amd64:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:debian-12-gcc14-amd64@sha256:c3cf02c77cabebcfd53055315335c7a5ff46a4dde539bfdfa570c7e4ba2281c3
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:debian-12-gcc14-amd64
linux_x64_llvmaot:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream8@sha256:c7f8108d3c0dcf35c258f735de42082f52415a53a75788e75c054cd593210b29
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream8
browser_wasm:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-webassembly-amd64@sha256:b56cd247f05b5b1353bb1b1f2f22061d4bbff1ee0f0c8e6d49e3a382b728d0ba
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-webassembly-amd64
env:
ROOTFS_DIR: /crossrootfs/x64
wasi_wasm:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-webassembly-amd64@sha256:b56cd247f05b5b1353bb1b1f2f22061d4bbff1ee0f0c8e6d49e3a382b728d0ba
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-webassembly-amd64
env:
ROOTFS_DIR: /crossrootfs/x64
freebsd_x64:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-freebsd-14-amd64@sha256:9a13870c8778c0791e7329cf9339de2c6460b2d3bb271d5e9821d7b4ec2a3eca
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:azurelinux-3.0-net10.0-cross-freebsd-14-amd64
env:
ROOTFS_DIR: /crossrootfs/x64
tizen_armel:
- image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04-cross-armel-tizen@sha256:325453b3c4d3d6cae2abc22ce2735bd92d224f20e5ab35b27758c965c4f69c8d
+ image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04-cross-armel-tizen
env:
ROOTFS_DIR: /crossrootfs/armel
diff --git a/eng/pipelines/common/templates/runtimes/run-test-job.yml b/eng/pipelines/common/templates/runtimes/run-test-job.yml
index 551a502ae9ed60..4171b5bcc33e57 100644
--- a/eng/pipelines/common/templates/runtimes/run-test-job.yml
+++ b/eng/pipelines/common/templates/runtimes/run-test-job.yml
@@ -386,6 +386,9 @@ jobs:
- jitstress_isas_x86_nosse41
- jitstress_isas_x86_nosse42
- jitstress_isas_x86_nossse3
+ - jitstress_isas_x86_vectort128
+ - jitstress_isas_x86_vectort512
+ - jitstress_isas_x86_noavx512_vectort128
- jitstress_isas_1_x86_noaes
- jitstress_isas_1_x86_noavx
- jitstress_isas_1_x86_noavx2
@@ -540,7 +543,6 @@ jobs:
- jitpartialcompilation_pgo
- jitpartialcompilation_pgo_stress_random
- jitoptrepeat
- - jitoldlayout
${{ else }}:
scenarios:
- jitosr_stress
@@ -554,7 +556,6 @@ jobs:
- jitphysicalpromotion_full
- jitrlcse
- jitoptrepeat
- - jitoldlayout
${{ if in(parameters.testGroup, 'jit-cfg') }}:
scenarios:
- jitcfg
diff --git a/eng/pipelines/common/templates/wasm-library-tests.yml b/eng/pipelines/common/templates/wasm-library-tests.yml
index 51cf40074ff5bd..149215ea501937 100644
--- a/eng/pipelines/common/templates/wasm-library-tests.yml
+++ b/eng/pipelines/common/templates/wasm-library-tests.yml
@@ -80,7 +80,7 @@ jobs:
isExtraPlatforms: ${{ parameters.isExtraPlatformsBuild }}
testGroup: innerloop
nameSuffix: LibraryTests${{ parameters.nameSuffix }}
- buildArgs: -s mono+libs+host+packs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true /p:MonoEnableAssertMessages=true /p:BrowserHost=$(_hostedOs) $(_wasmRunSmokeTestsOnlyArg) $(chromeInstallArg) $(firefoxInstallArg) $(v8InstallArg) ${{ parameters.extraBuildArgs }}
+ buildArgs: -s mono+libs+host+packs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true /p:MonoEnableAssertMessages=true /p:BrowserHost=$(_hostedOs) $(_wasmRunSmokeTestsOnlyArg) $(chromeInstallArg) $(firefoxInstallArg) $(v8InstallArg) /maxcpucount:1 ${{ parameters.extraBuildArgs }}
timeoutInMinutes: 240
# if !alwaysRun, then:
# if this is runtime-wasm (isWasmOnlyBuild):
diff --git a/eng/pipelines/coreclr/libraries-pgo.yml b/eng/pipelines/coreclr/libraries-pgo.yml
index 00a050da0e6210..a8f0e16b01f35c 100644
--- a/eng/pipelines/coreclr/libraries-pgo.yml
+++ b/eng/pipelines/coreclr/libraries-pgo.yml
@@ -71,4 +71,3 @@ extends:
- syntheticpgo
- syntheticpgo_blend
- jitrlcse
- - jitoldlayout
diff --git a/eng/pipelines/coreclr/superpmi-replay.yml b/eng/pipelines/coreclr/superpmi-replay.yml
index 7ec31af8732eeb..eb907287a6a0a8 100644
--- a/eng/pipelines/coreclr/superpmi-replay.yml
+++ b/eng/pipelines/coreclr/superpmi-replay.yml
@@ -1,20 +1,20 @@
-# This pipeline only runs on GitHub PRs, not on merges.
trigger: none
-# Only run on changes to the JIT directory. Don't run if the JIT-EE GUID has changed,
-# since there won't be any SuperPMI collections with the new GUID until the collection
-# pipeline completes after this PR is merged.
pr:
branches:
include:
- main
paths:
include:
- - src/coreclr/jit/*
- - src/coreclr/gcinfo/*
- - src/coreclr/tools/superpmi/*
- exclude:
- - src/coreclr/inc/jiteeversionguid.h
+ - src/coreclr/jit/lsra*.*
+
+schedules:
+- cron: "0 7 * * *"
+ displayName: Daily at 11:00 PM (UTC-8:00)
+ branches:
+ include:
+ - main
+ always: true
variables:
- template: /eng/pipelines/common/variables.yml
diff --git a/eng/pipelines/extra-platforms/runtime-extra-platforms-other.yml b/eng/pipelines/extra-platforms/runtime-extra-platforms-other.yml
index e47cb4996cc704..d4c755945c0372 100644
--- a/eng/pipelines/extra-platforms/runtime-extra-platforms-other.yml
+++ b/eng/pipelines/extra-platforms/runtime-extra-platforms-other.yml
@@ -40,7 +40,7 @@ jobs:
eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
eq(variables['isRollingBuild'], true))
-# Run net48 tests on win-x64
+# Run net481 tests on win-x64
- template: /eng/pipelines/common/platform-matrix.yml
parameters:
jobTemplate: /eng/pipelines/common/global-build-job.yml
@@ -49,16 +49,16 @@ jobs:
- windows_x64
helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml
jobParameters:
- framework: net48
- buildArgs: -s tools+libs+libs.tests -framework net48 -c $(_BuildConfig) -testscope innerloop /p:ArchiveTests=true
- nameSuffix: Libraries_NET48
+ framework: net481
+ buildArgs: -s tools+libs+libs.tests -framework net481 -c $(_BuildConfig) -testscope innerloop /p:ArchiveTests=true
+ nameSuffix: Libraries_NET481
timeoutInMinutes: 150
postBuildSteps:
- template: /eng/pipelines/libraries/helix.yml
parameters:
creator: dotnet-bot
- testRunNamePrefixSuffix: NET48_$(_BuildConfig)
- extraHelixArguments: /p:BuildTargetFramework=net48
+ testRunNamePrefixSuffix: NET481_$(_BuildConfig)
+ extraHelixArguments: /p:BuildTargetFramework=net481
isExtraPlatformsBuild: ${{ parameters.isExtraPlatformsBuild }}
condition: >-
or(
diff --git a/eng/pipelines/libraries/helix-queues-setup.yml b/eng/pipelines/libraries/helix-queues-setup.yml
index f5b94c910c9dec..817c66b36286c5 100644
--- a/eng/pipelines/libraries/helix-queues-setup.yml
+++ b/eng/pipelines/libraries/helix-queues-setup.yml
@@ -34,7 +34,7 @@ jobs:
# Linux arm64
- ${{ if eq(parameters.platform, 'linux_arm64') }}:
- - (Ubuntu.2410.Arm64.Open)Ubuntu.2204.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-24.10-helix-arm64v8
+ - (Ubuntu.2504.Arm64.Open)Ubuntu.2204.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-25.04-helix-arm64v8
- ${{ if or(ne(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
- (Debian.13.Arm64.Open)Ubuntu.2204.Armarch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-13-helix-arm64v8
@@ -56,7 +56,7 @@ jobs:
- SLES.15.Amd64.Open
- (Centos.10.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream-10-helix-amd64
- (Fedora.41.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:fedora-41-helix
- - (Ubuntu.2404.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-24.04-helix-amd64
+ - (Ubuntu.2504.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-25.04-helix-amd64
- (Debian.13.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-13-helix-amd64
- ${{ if or(ne(parameters.jobParameters.testScope, 'outerloop'), ne(parameters.jobParameters.runtimeFlavor, 'mono')) }}:
- ${{ if or(eq(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
@@ -115,7 +115,7 @@ jobs:
# windows x64
- ${{ if eq(parameters.platform, 'windows_x64') }}:
# netcoreapp
- - ${{ if notIn(parameters.jobParameters.framework, 'net48') }}:
+ - ${{ if notIn(parameters.jobParameters.framework, 'net481') }}:
# libraries on mono outerloop
- ${{ if and(eq(parameters.jobParameters.testScope, 'outerloop'), eq(parameters.jobParameters.runtimeFlavor, 'mono')) }}:
- Windows.Amd64.Server2022.Open
@@ -137,14 +137,14 @@ jobs:
- (Windows.Nano.1809.Amd64.Open)windows.10.amd64.serverrs5.open@mcr.microsoft.com/dotnet-buildtools/prereqs:nanoserver-1809-helix-amd64
# .NETFramework
- - ${{ if eq(parameters.jobParameters.framework, 'net48') }}:
+ - ${{ if eq(parameters.jobParameters.framework, 'net481') }}:
- Windows.11.Amd64.Client.Open
# windows x86
- ${{ if eq(parameters.platform, 'windows_x86') }}:
# netcoreapp
- - ${{ if notIn(parameters.jobParameters.framework, 'net48') }}:
+ - ${{ if notIn(parameters.jobParameters.framework, 'net481') }}:
# mono outerloop
- ${{ if and(eq(parameters.jobParameters.testScope, 'outerloop'), eq(parameters.jobParameters.runtimeFlavor, 'mono')) }}:
- Windows.11.Amd64.Client.Open
@@ -155,7 +155,7 @@ jobs:
- Windows.11.Amd64.Client.Open
# .NETFramework
- - ${{ if eq(parameters.jobParameters.framework, 'net48') }}:
+ - ${{ if eq(parameters.jobParameters.framework, 'net481') }}:
- Windows.10.Amd64.Client.Open
# windows arm64
diff --git a/eng/pipelines/libraries/outerloop.yml b/eng/pipelines/libraries/outerloop.yml
index 597f298c37a3e0..bbb1adc12cde55 100644
--- a/eng/pipelines/libraries/outerloop.yml
+++ b/eng/pipelines/libraries/outerloop.yml
@@ -92,10 +92,10 @@ extends:
- ${{ if eq(variables['isRollingBuild'], true) }}:
- windows_x64
jobParameters:
- framework: net48
+ framework: net481
testScope: outerloop
- nameSuffix: NET48
- buildArgs: -s libs+libs.tests -c $(_BuildConfig) -testscope outerloop /p:ArchiveTests=true -f net48
+ nameSuffix: NET481
+ buildArgs: -s libs+libs.tests -c $(_BuildConfig) -testscope outerloop /p:ArchiveTests=true -f net481
timeoutInMinutes: 180
includeAllPlatforms: ${{ variables['isRollingBuild'] }}
# extra steps, run tests
@@ -104,4 +104,4 @@ extends:
parameters:
testScope: outerloop
creator: dotnet-bot
- extraHelixArguments: /p:BuildTargetFramework=net48
+ extraHelixArguments: /p:BuildTargetFramework=net481
diff --git a/eng/pipelines/libraries/stress/http.yml b/eng/pipelines/libraries/stress/http.yml
index 257334fce3e99c..e083bbac2a6e43 100644
--- a/eng/pipelines/libraries/stress/http.yml
+++ b/eng/pipelines/libraries/stress/http.yml
@@ -8,11 +8,11 @@ pr:
schedules:
- cron: "0 13 * * *" # 1PM UTC => 5 AM PST
displayName: HttpStress nightly run
+ always: true
branches:
include:
- main
- - release/8.0
- - release/9.0
+ - release/*-staging
variables:
- template: ../variables.yml
diff --git a/eng/pipelines/libraries/stress/ssl.yml b/eng/pipelines/libraries/stress/ssl.yml
index 360e67a86c98d4..eb2088242dcd2d 100644
--- a/eng/pipelines/libraries/stress/ssl.yml
+++ b/eng/pipelines/libraries/stress/ssl.yml
@@ -8,11 +8,11 @@ pr:
schedules:
- cron: "0 13 * * *" # 1PM UTC => 5 AM PST
displayName: SslStress nightly run
+ always: true
branches:
include:
- main
- - release/8.0
- - release/9.0
+ - release/*-staging
variables:
- template: ../variables.yml
diff --git a/eng/pipelines/performance/perf-build.yml b/eng/pipelines/performance/perf-build.yml
new file mode 100644
index 00000000000000..de9a7674c15937
--- /dev/null
+++ b/eng/pipelines/performance/perf-build.yml
@@ -0,0 +1,236 @@
+parameters:
+- name: runPrivateJobs
+ displayName: Upload artifacts to blob storage
+ type: boolean
+ default: false
+- name: mauiFramework
+ type: string
+ default: 'net9.0'
+- name: coreclr_arm64_linux
+ displayName: Build Coreclr Arm64 Linux
+ type: boolean
+ default: true
+- name: coreclr_arm64_windows
+ displayName: Build Coreclr Arm64 Windows
+ type: boolean
+ default: true
+- name: coreclr_muslx64_linux
+ displayName: Build Coreclr Musl x64 Linux
+ type: boolean
+ default: true
+- name: coreclr_x64_linux
+ displayName: Build Coreclr x64 Linux
+ type: boolean
+ default: true
+- name: coreclr_x64_windows
+ displayName: Build Coreclr x64 Windows
+ type: boolean
+ default: true
+- name: coreclr_x86_windows
+ displayName: Build Coreclr x86 Windows
+ type: boolean
+ default: true
+- name: coreclr_arm64_android
+ displayName: Build Coreclr Arm64 Android
+ type: boolean
+ default: true
+- name: wasm
+ displayName: Build WebAssembly (wasm)
+ type: boolean
+ default: true
+- name: monoAot_arm64_linux
+ displayName: Build Mono AOT Arm64 Linux
+ type: boolean
+ default: true
+- name: monoAot_x64_linux
+ displayName: Build Mono AOT x64 Linux
+ type: boolean
+ default: true
+- name: mono_x64_linux
+ displayName: Build Mono x64 Linux
+ type: boolean
+ default: true
+- name: mono_arm64_linux
+ displayName: Build Mono Arm64 Linux
+ type: boolean
+ default: true
+- name: mono_arm64_android
+ displayName: Build Mono Arm64 Android
+ type: boolean
+ default: true
+- name: mono_arm64_ios
+ displayName: Build Mono Arm64 iOS
+ type: boolean
+ default: true
+- name: monoBDN_arm64_android
+ displayName: Build Mono Arm64 Android BDN (Not working)
+ type: boolean
+ default: false # currently not working
+- name: nativeAot_arm64_ios
+ displayName: Build native AOT Arm64 iOS
+ type: boolean
+ default: true
+
+trigger:
+ batch: false # we want to build every single commit
+ branches:
+ include:
+ - main
+ - release/9.0
+ - release/8.0
+ paths:
+ include:
+ - '*'
+ exclude:
+ - '**.md'
+ - .devcontainer/*
+ - .github/*
+ - docs/*
+ - LICENSE.TXT
+ - PATENTS.TXT
+ - THIRD-PARTY-NOTICES.TXT
+
+resources:
+ repositories:
+ - repository: performance
+ type: git
+ name: internal/dotnet-performance
+
+variables:
+ - template: /eng/pipelines/common/variables.yml
+
+extends:
+ template: /eng/pipelines/common/templates/pipeline-with-resources.yml
+ parameters:
+ stages:
+ - ${{ if and(ne(variables['System.TeamProject'], 'public'), or(eq(variables['Build.Reason'], 'IndividualCI'), parameters.runPrivateJobs)) }}:
+ - stage: RegisterBuild
+ displayName: 'Register Build'
+ jobs:
+ - template: /eng/pipelines/register-build-jobs.yml@performance
+ parameters:
+ runtimeRepoAlias: self
+ performanceRepoAlias: performance
+ buildType:
+ - ${{ if eq(parameters.coreclr_arm64_linux, true) }}:
+ - coreclr_arm64_linux
+ - ${{ if eq(parameters.coreclr_arm64_windows, true) }}:
+ - coreclr_arm64_windows
+ - ${{ if eq(parameters.coreclr_muslx64_linux, true) }}:
+ - coreclr_muslx64_linux
+ - ${{ if eq(parameters.coreclr_x64_linux, true) }}:
+ - coreclr_x64_linux
+ - ${{ if eq(parameters.coreclr_x64_windows, true) }}:
+ - coreclr_x64_windows
+ - ${{ if eq(parameters.coreclr_x86_windows, true) }}:
+ - coreclr_x86_windows
+ - ${{ if eq(parameters.coreclr_arm64_android, true) }}:
+ - coreclr_arm64_android
+ - ${{ if eq(parameters.wasm, true) }}:
+ - wasm
+ - ${{ if eq(parameters.monoAot_arm64_linux, true) }}:
+ - monoAot_arm64_linux
+ - ${{ if eq(parameters.monoAot_x64_linux, true) }}:
+ - monoAot_x64_linux
+ - ${{ if eq(parameters.mono_x64_linux, true) }}:
+ - mono_x64_linux
+ - ${{ if eq(parameters.mono_arm64_linux, true) }}:
+ - mono_arm64_linux
+ - ${{ if eq(parameters.mono_arm64_android, true) }}:
+ - mono_arm64_android
+ - ${{ if eq(parameters.mono_arm64_ios, true) }}:
+ - mono_arm64_ios
+ - ${{ if eq(parameters.monoBDN_arm64_android, true) }}:
+ - monoBDN_arm64_android
+ - ${{ if eq(parameters.nativeAot_arm64_ios, true) }}:
+ - nativeAot_arm64_ios
+
+ - stage: Build
+ displayName: 'Build'
+ dependsOn: [] # so it runs in parallel with the RegisterBuild stage
+ jobs:
+ - template: /eng/pipelines/runtime-perf-build-jobs.yml@performance
+ parameters:
+ runtimeRepoAlias: self
+ performanceRepoAlias: performance
+ buildType:
+ - ${{ if eq(parameters.coreclr_arm64_linux, true) }}:
+ - coreclr_arm64_linux
+ - ${{ if eq(parameters.coreclr_arm64_windows, true) }}:
+ - coreclr_arm64_windows
+ - ${{ if eq(parameters.coreclr_muslx64_linux, true) }}:
+ - coreclr_muslx64_linux
+ - ${{ if eq(parameters.coreclr_x64_linux, true) }}:
+ - coreclr_x64_linux
+ - ${{ if eq(parameters.coreclr_x64_windows, true) }}:
+ - coreclr_x64_windows
+ - ${{ if eq(parameters.coreclr_x86_windows, true) }}:
+ - coreclr_x86_windows
+ - ${{ if eq(parameters.coreclr_arm64_android, true) }}:
+ - coreclr_arm64_android
+ - ${{ if eq(parameters.wasm, true) }}:
+ - wasm
+ - ${{ if eq(parameters.monoAot_arm64_linux, true) }}:
+ - monoAot_arm64_linux
+ - ${{ if eq(parameters.monoAot_x64_linux, true) }}:
+ - monoAot_x64_linux
+ - ${{ if eq(parameters.mono_x64_linux, true) }}:
+ - mono_x64_linux
+ - ${{ if eq(parameters.mono_arm64_linux, true) }}:
+ - mono_arm64_linux
+ - ${{ if eq(parameters.mono_arm64_android, true) }}:
+ - mono_arm64_android
+ - ${{ if eq(parameters.mono_arm64_ios, true) }}:
+ - mono_arm64_ios
+ - ${{ if eq(parameters.monoBDN_arm64_android, true) }}:
+ - monoBDN_arm64_android
+ - ${{ if eq(parameters.nativeAot_arm64_ios, true) }}:
+ - nativeAot_arm64_ios
+ ${{ if parameters.mauiFramework }}:
+ mauiFramework: ${{ parameters.mauiFramework }}
+
+ - ${{ if and(ne(variables['System.TeamProject'], 'public'), or(eq(variables['Build.Reason'], 'IndividualCI'), parameters.runPrivateJobs)) }}:
+ - stage: UploadArtifacts
+ displayName: 'Upload Artifacts'
+ condition: always()
+ dependsOn:
+ - Build
+ - RegisterBuild
+ jobs:
+ - template: /eng/pipelines/upload-build-artifacts-jobs.yml@performance
+ parameters:
+ runtimeRepoAlias: self
+ performanceRepoAlias: performance
+ buildType:
+ - ${{ if eq(parameters.coreclr_arm64_linux, true) }}:
+ - coreclr_arm64_linux
+ - ${{ if eq(parameters.coreclr_arm64_windows, true) }}:
+ - coreclr_arm64_windows
+ - ${{ if eq(parameters.coreclr_muslx64_linux, true) }}:
+ - coreclr_muslx64_linux
+ - ${{ if eq(parameters.coreclr_x64_linux, true) }}:
+ - coreclr_x64_linux
+ - ${{ if eq(parameters.coreclr_x64_windows, true) }}:
+ - coreclr_x64_windows
+ - ${{ if eq(parameters.coreclr_x86_windows, true) }}:
+ - coreclr_x86_windows
+ - ${{ if eq(parameters.coreclr_arm64_android, true) }}:
+ - coreclr_arm64_android
+ - ${{ if eq(parameters.wasm, true) }}:
+ - wasm
+ - ${{ if eq(parameters.monoAot_arm64_linux, true) }}:
+ - monoAot_arm64_linux
+ - ${{ if eq(parameters.monoAot_x64_linux, true) }}:
+ - monoAot_x64_linux
+ - ${{ if eq(parameters.mono_x64_linux, true) }}:
+ - mono_x64_linux
+ - ${{ if eq(parameters.mono_arm64_linux, true) }}:
+ - mono_arm64_linux
+ - ${{ if eq(parameters.mono_arm64_android, true) }}:
+ - mono_arm64_android
+ - ${{ if eq(parameters.mono_arm64_ios, true) }}:
+ - mono_arm64_ios
+ - ${{ if eq(parameters.monoBDN_arm64_android, true) }}:
+ - monoBDN_arm64_android
+ - ${{ if eq(parameters.nativeAot_arm64_ios, true) }}:
+ - nativeAot_arm64_ios
diff --git a/eng/pipelines/performance/perf-slow.yml b/eng/pipelines/performance/perf-slow.yml
index d30ecea46d79a5..84ca1836ff2321 100644
--- a/eng/pipelines/performance/perf-slow.yml
+++ b/eng/pipelines/performance/perf-slow.yml
@@ -15,10 +15,10 @@ trigger:
include:
- main
- release/9.0
+ - release/8.0
paths:
include:
- '*'
- - src/libraries/System.Private.CoreLib/*
exclude:
- '**.md'
- .devcontainer/*
@@ -62,4 +62,4 @@ extends:
performanceRepoAlias: performance
jobParameters:
${{ if parameters.onlySanityCheck }}:
- onlySanityCheck: true
\ No newline at end of file
+ onlySanityCheck: true
diff --git a/eng/pipelines/performance/perf.yml b/eng/pipelines/performance/perf.yml
index e717fbe4915927..01b80b580db2bd 100644
--- a/eng/pipelines/performance/perf.yml
+++ b/eng/pipelines/performance/perf.yml
@@ -13,7 +13,6 @@ trigger:
paths:
include:
- '*'
- - src/libraries/System.Private.CoreLib/*
exclude:
- '**.md'
- .devcontainer/*
diff --git a/eng/pipelines/performance/templates/perf-bdn-build-jobs.yml b/eng/pipelines/performance/templates/perf-bdn-build-jobs.yml
index 3355ceeedb9d2b..c6ffc983f8c175 100644
--- a/eng/pipelines/performance/templates/perf-bdn-build-jobs.yml
+++ b/eng/pipelines/performance/templates/perf-bdn-build-jobs.yml
@@ -18,7 +18,7 @@ jobs:
- ios_arm64
jobParameters:
dependsOn:
- - Build_android_arm64_release_Mono_Packs
+ - build_android_arm64_release_Mono_Packs
buildArgs: -s mono -c $(_BuildConfig)
nameSuffix: PerfBDNApp
isOfficialBuild: false
diff --git a/eng/pipelines/performance/templates/perf-build-jobs.yml b/eng/pipelines/performance/templates/perf-build-jobs.yml
index 4a77a9796eaa75..4defede2528db2 100644
--- a/eng/pipelines/performance/templates/perf-build-jobs.yml
+++ b/eng/pipelines/performance/templates/perf-build-jobs.yml
@@ -9,7 +9,7 @@ jobs:
windows_x64: true
windows_x86: true
linux_musl_x64: true
- coreclrAndroid: true
+ android_arm64: true
# build mono for AOT
- template: /eng/pipelines/performance/templates/perf-mono-build-jobs.yml
diff --git a/eng/pipelines/performance/templates/perf-coreclr-build-jobs.yml b/eng/pipelines/performance/templates/perf-coreclr-build-jobs.yml
index 96792aab986a07..ddf103aaf4c0d9 100644
--- a/eng/pipelines/performance/templates/perf-coreclr-build-jobs.yml
+++ b/eng/pipelines/performance/templates/perf-coreclr-build-jobs.yml
@@ -1,11 +1,11 @@
parameters:
linux_x64: false
linux_musl_x64: false
+ linux_arm64: false
windows_x64: false
windows_x86: false
- linux_arm64: false
windows_arm64: false
- coreclrAndroid: false
+ android_arm64: false
jobs:
- ${{ if or(eq(parameters.linux_x64, true), eq(parameters.windows_x64, true), eq(parameters.windows_x86, true), eq(parameters.linux_musl_x64, true), eq(parameters.linux_arm64, true), eq(parameters.windows_arm64, true)) }}:
@@ -42,7 +42,7 @@ jobs:
artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)_coreclr
displayName: Build Assets
- - ${{ if eq(parameters.coreclrAndroid, true) }}:
+ - ${{ if eq(parameters.android_arm64, true) }}:
# build CoreCLR Android scenarios
- template: /eng/pipelines/common/platform-matrix.yml
parameters:
diff --git a/eng/pipelines/performance/templates/perf-ios-scenarios-build-jobs.yml b/eng/pipelines/performance/templates/perf-ios-scenarios-build-jobs.yml
index 10b2d79a8d03bc..85cff56fdc4c62 100644
--- a/eng/pipelines/performance/templates/perf-ios-scenarios-build-jobs.yml
+++ b/eng/pipelines/performance/templates/perf-ios-scenarios-build-jobs.yml
@@ -1,7 +1,7 @@
parameters:
hybridGlobalization: true
- mono: true
- nativeAot: true
+ mono: false
+ nativeAot: false
jobs:
- ${{ if eq(parameters.mono, true) }}:
diff --git a/eng/pipelines/runtime.yml b/eng/pipelines/runtime.yml
index fe9c980fbb9354..3dca7fedc2467b 100644
--- a/eng/pipelines/runtime.yml
+++ b/eng/pipelines/runtime.yml
@@ -125,7 +125,7 @@ extends:
- browser_wasm
jobParameters:
nameSuffix: AllSubsets_CoreCLR
- buildArgs: -s mono.emsdk+clr.paltests -rc Release -c Release -lc $(_BuildConfig)
+ buildArgs: -s mono.emsdk+clr.runtime -rc Release -c Release -lc $(_BuildConfig)
timeoutInMinutes: 120
condition: >-
or(
@@ -1234,16 +1234,16 @@ extends:
- windows_x86
helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml
jobParameters:
- framework: net48
- buildArgs: -s tools+libs+libs.tests -framework net48 -c $(_BuildConfig) -testscope innerloop /p:ArchiveTests=true
- nameSuffix: Libraries_NET48
+ framework: net481
+ buildArgs: -s tools+libs+libs.tests -framework net481 -c $(_BuildConfig) -testscope innerloop /p:ArchiveTests=true
+ nameSuffix: Libraries_NET481
timeoutInMinutes: 150
postBuildSteps:
- template: /eng/pipelines/libraries/helix.yml
parameters:
creator: dotnet-bot
- testRunNamePrefixSuffix: NET48_$(_BuildConfig)
- extraHelixArguments: /p:BuildTargetFramework=net48
+ testRunNamePrefixSuffix: NET481_$(_BuildConfig)
+ extraHelixArguments: /p:BuildTargetFramework=net481
condition: >-
or(
eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
diff --git a/eng/testing/BrowserVersions.props b/eng/testing/BrowserVersions.props
index 3f9754af2065f0..1dda520b8e1b1e 100644
--- a/eng/testing/BrowserVersions.props
+++ b/eng/testing/BrowserVersions.props
@@ -1,10 +1,10 @@
- 134.0.6998.35
+ 134.0.6998.88
1415337
https://storage.googleapis.com/chromium-browser-snapshots/Linux_x64/1415339
13.4.115
- 134.0.6998.35
+ 134.0.6998.89
1415337
https://storage.googleapis.com/chromium-browser-snapshots/Win_x64/1415350
13.4.115
diff --git a/eng/testing/tests.wasm.targets b/eng/testing/tests.wasm.targets
index 65ab8ed35de7f8..0fc4926a782a41 100644
--- a/eng/testing/tests.wasm.targets
+++ b/eng/testing/tests.wasm.targets
@@ -87,7 +87,9 @@
-
+
+
+
public sealed partial class Mutex : WaitHandle
{
- private void CreateMutexCore(bool initiallyOwned, string? name, out bool createdNew)
+ private unsafe void CreateMutexCore(bool initiallyOwned)
{
- SafeWaitHandle mutexHandle = CreateMutexCore(initiallyOwned, name, out int errorCode, out string? errorDetails);
+ SafeWaitHandle handle =
+ CreateMutex(
+ initiallyOwned,
+ name: null,
+ currentUserOnly: false,
+ systemCallErrors: null,
+ systemCallErrorsBufferSize: 0);
+ if (handle.IsInvalid)
+ {
+ int errorCode = Marshal.GetLastPInvokeError();
+ handle.SetHandleAsInvalid();
+ throw Win32Marshal.GetExceptionForWin32Error(errorCode);
+ }
+
+ SafeWaitHandle = handle;
+ }
+
+ private void CreateMutexCore(
+ bool initiallyOwned,
+ string? name,
+ NamedWaitHandleOptionsInternal options,
+ out bool createdNew)
+ {
+ bool currentUserOnly = false;
+ if (!string.IsNullOrEmpty(name) && options.WasSpecified)
+ {
+ name = options.GetNameWithSessionPrefix(name);
+ currentUserOnly = options.CurrentUserOnly;
+ }
+
+ SafeWaitHandle mutexHandle =
+ CreateMutexCore(initiallyOwned, name, currentUserOnly, out int errorCode, out string? errorDetails);
if (mutexHandle.IsInvalid)
{
mutexHandle.SetHandleAsInvalid();
@@ -33,16 +64,26 @@ private void CreateMutexCore(bool initiallyOwned, string? name, out bool created
SafeWaitHandle = mutexHandle;
}
- private static OpenExistingResult OpenExistingWorker(string name, out Mutex? result)
+ private static OpenExistingResult OpenExistingWorker(
+ string name,
+ NamedWaitHandleOptionsInternal options,
+ out Mutex? result)
{
ArgumentException.ThrowIfNullOrEmpty(name);
+ bool currentUserOnly = false;
+ if (options.WasSpecified)
+ {
+ name = options.GetNameWithSessionPrefix(name);
+ currentUserOnly = options.CurrentUserOnly;
+ }
+
result = null;
// To allow users to view & edit the ACL's, call OpenMutex
// with parameters to allow us to view & edit the ACL. This will
// fail if we don't have permission to view or edit the ACL's.
// If that happens, ask for less permissions.
- SafeWaitHandle myHandle = OpenMutexCore(name, out int errorCode, out string? errorDetails);
+ SafeWaitHandle myHandle = OpenMutexCore(name, currentUserOnly, out int errorCode, out string? errorDetails);
if (myHandle.IsInvalid)
{
@@ -86,11 +127,13 @@ public void ReleaseMutex()
private static unsafe SafeWaitHandle CreateMutexCore(
bool initialOwner,
string? name,
+ bool currentUserOnly,
out int errorCode,
out string? errorDetails)
{
byte* systemCallErrors = stackalloc byte[SystemCallErrorsBufferSize];
- SafeWaitHandle mutexHandle = CreateMutex(initialOwner, name, systemCallErrors, SystemCallErrorsBufferSize);
+ SafeWaitHandle mutexHandle =
+ CreateMutex(initialOwner, name, currentUserOnly, systemCallErrors, SystemCallErrorsBufferSize);
// Get the error code even if the handle is valid, as it could be ERROR_ALREADY_EXISTS, indicating that the mutex
// already exists and was opened
@@ -100,10 +143,10 @@ private static unsafe SafeWaitHandle CreateMutexCore(
return mutexHandle;
}
- private static unsafe SafeWaitHandle OpenMutexCore(string name, out int errorCode, out string? errorDetails)
+ private static unsafe SafeWaitHandle OpenMutexCore(string name, bool currentUserOnly, out int errorCode, out string? errorDetails)
{
byte* systemCallErrors = stackalloc byte[SystemCallErrorsBufferSize];
- SafeWaitHandle mutexHandle = OpenMutex(name, systemCallErrors, SystemCallErrorsBufferSize);
+ SafeWaitHandle mutexHandle = OpenMutex(name, currentUserOnly, systemCallErrors, SystemCallErrorsBufferSize);
errorCode = mutexHandle.IsInvalid ? Marshal.GetLastPInvokeError() : Interop.Errors.ERROR_SUCCESS;
errorDetails = mutexHandle.IsInvalid ? GetErrorDetails(systemCallErrors) : null;
return mutexHandle;
@@ -127,9 +170,9 @@ private static unsafe SafeWaitHandle OpenMutexCore(string name, out int errorCod
}
[LibraryImport(RuntimeHelpers.QCall, EntryPoint = "PAL_CreateMutexW", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)]
- private static unsafe partial SafeWaitHandle CreateMutex([MarshalAs(UnmanagedType.Bool)] bool initialOwner, string? name, byte* systemCallErrors, uint systemCallErrorsBufferSize);
+ private static unsafe partial SafeWaitHandle CreateMutex([MarshalAs(UnmanagedType.Bool)] bool initialOwner, string? name, [MarshalAs(UnmanagedType.Bool)] bool currentUserOnly, byte* systemCallErrors, uint systemCallErrorsBufferSize);
[LibraryImport(RuntimeHelpers.QCall, EntryPoint = "PAL_OpenMutexW", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)]
- private static unsafe partial SafeWaitHandle OpenMutex(string name, byte* systemCallErrors, uint systemCallErrorsBufferSize);
+ private static unsafe partial SafeWaitHandle OpenMutex(string name, [MarshalAs(UnmanagedType.Bool)] bool currentUserOnly, byte* systemCallErrors, uint systemCallErrorsBufferSize);
}
}
diff --git a/src/coreclr/clrdefinitions.cmake b/src/coreclr/clrdefinitions.cmake
index d5b2d29d2e5d5a..550ce70a14ef7a 100644
--- a/src/coreclr/clrdefinitions.cmake
+++ b/src/coreclr/clrdefinitions.cmake
@@ -58,7 +58,7 @@ if(CLR_CMAKE_HOST_WIN32)
add_compile_definitions(NOMINMAX)
endif(CLR_CMAKE_HOST_WIN32)
-if (NOT (CLR_CMAKE_TARGET_ARCH_I386 AND CLR_CMAKE_TARGET_UNIX))
+if (NOT ((CLR_CMAKE_TARGET_ARCH_I386 AND CLR_CMAKE_TARGET_UNIX) OR CLR_CMAKE_TARGET_ARCH_WASM))
add_compile_definitions(FEATURE_METADATA_UPDATER)
endif()
if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR (CLR_CMAKE_TARGET_ARCH_I386 AND CLR_CMAKE_TARGET_WIN32))
@@ -119,9 +119,9 @@ if(CLR_CMAKE_TARGET_LINUX)
add_definitions(-DFEATURE_EVENTSOURCE_XPLAT)
endif(CLR_CMAKE_TARGET_LINUX)
# NetBSD doesn't implement this feature
-if(NOT CLR_CMAKE_TARGET_NETBSD)
+if(NOT CLR_CMAKE_TARGET_NETBSD AND NOT CLR_CMAKE_TARGET_ARCH_WASM)
add_definitions(-DFEATURE_HIJACK)
-endif(NOT CLR_CMAKE_TARGET_NETBSD)
+endif(NOT CLR_CMAKE_TARGET_NETBSD AND NOT CLR_CMAKE_TARGET_ARCH_WASM)
if (CLR_CMAKE_TARGET_WIN32 AND (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM64))
add_definitions(-DFEATURE_INTEROP_DEBUGGING)
endif (CLR_CMAKE_TARGET_WIN32 AND (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM64))
@@ -188,9 +188,9 @@ if (CLR_CMAKE_TARGET_ARCH_AMD64)
add_definitions(-DUNIX_AMD64_ABI_ITF)
endif (CLR_CMAKE_TARGET_ARCH_AMD64)
add_definitions(-DFEATURE_USE_ASM_GC_WRITE_BARRIERS)
-if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64)
+if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64 OR CLR_CMAKE_TARGET_ARCH_ARM)
add_definitions(-DFEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP)
-endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64)
+endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64 OR CLR_CMAKE_TARGET_ARCH_ARM)
if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64)
add_definitions(-DFEATURE_MANUALLY_MANAGED_CARD_BUNDLES)
endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64)
@@ -213,6 +213,9 @@ if (CLR_CMAKE_TARGET_WIN32 AND (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_
add_definitions(-DFEATURE_SPECIAL_USER_MODE_APC)
endif()
+if (FEATURE_STUBPRECODE_DYNAMIC_HELPERS)
+ add_definitions(-DFEATURE_STUBPRECODE_DYNAMIC_HELPERS)
+endif()
# Use this function to enable building with a specific target OS and architecture set of defines
# This is known to work for the set of defines used by the JIT and gcinfo, it is not likely correct for
diff --git a/src/coreclr/clrfeatures.cmake b/src/coreclr/clrfeatures.cmake
index d6a8965843e585..a51d5e9eca1791 100644
--- a/src/coreclr/clrfeatures.cmake
+++ b/src/coreclr/clrfeatures.cmake
@@ -52,7 +52,7 @@ if (CLR_CMAKE_TARGET_WIN32)
endif(CLR_CMAKE_TARGET_WIN32)
-if (CLR_CMAKE_TARGET_MACCATALYST OR CLR_CMAKE_TARGET_IOS OR CLR_CMAKE_TARGET_TVOS)
+if (CLR_CMAKE_TARGET_MACCATALYST OR CLR_CMAKE_TARGET_IOS OR CLR_CMAKE_TARGET_TVOS OR CLR_CMAKE_TARGET_ARCH_WASM)
set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH 1)
set(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH 0)
else()
@@ -70,3 +70,7 @@ if (CLR_CMAKE_HOST_UNIX AND CLR_CMAKE_HOST_ARCH_AMD64)
# Allow 16 byte compare-exchange (cmpxchg16b)
add_compile_options($<${FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH}:-mcx16>)
endif()
+
+if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64)
+ set(FEATURE_STUBPRECODE_DYNAMIC_HELPERS 1)
+endif()
diff --git a/src/coreclr/debug/daccess/request.cpp b/src/coreclr/debug/daccess/request.cpp
index d43689d6dba9b4..06b00e4ffa75f2 100644
--- a/src/coreclr/debug/daccess/request.cpp
+++ b/src/coreclr/debug/daccess/request.cpp
@@ -3674,6 +3674,9 @@ static const char *LoaderAllocatorLoaderHeapNames[] =
"ExecutableHeap",
"FixupPrecodeHeap",
"NewStubPrecodeHeap",
+#if defined(FEATURE_READYTORUN) && defined(FEATURE_STUBPRECODE_DYNAMIC_HELPERS)
+ "DynamicHelpersStubHeap",
+#endif // defined(FEATURE_READYTORUN) && defined(FEATURE_STUBPRECODE_DYNAMIC_HELPERS)
"IndcellHeap",
#ifdef FEATURE_VIRTUAL_STUB_DISPATCH
"CacheEntryHeap",
@@ -3711,7 +3714,9 @@ HRESULT ClrDataAccess::GetLoaderAllocatorHeaps(CLRDATA_ADDRESS loaderAllocatorAd
pLoaderHeaps[i++] = HOST_CDADDR(pLoaderAllocator->GetExecutableHeap());
pLoaderHeaps[i++] = HOST_CDADDR(pLoaderAllocator->GetFixupPrecodeHeap());
pLoaderHeaps[i++] = HOST_CDADDR(pLoaderAllocator->GetNewStubPrecodeHeap());
-
+#if defined(FEATURE_READYTORUN) && defined(FEATURE_STUBPRECODE_DYNAMIC_HELPERS)
+ pLoaderHeaps[i++] = HOST_CDADDR(pLoaderAllocator->GetDynamicHelpersStubHeap());
+#endif // defined(FEATURE_READYTORUN) && defined(FEATURE_STUBPRECODE_DYNAMIC_HELPERS)
VirtualCallStubManager *pVcsMgr = pLoaderAllocator->GetVirtualCallStubManager();
if (pVcsMgr == nullptr)
{
diff --git a/src/coreclr/debug/inc/dbgipcevents.h b/src/coreclr/debug/inc/dbgipcevents.h
index 6c39939f00307e..9ff25d86edf185 100644
--- a/src/coreclr/debug/inc/dbgipcevents.h
+++ b/src/coreclr/debug/inc/dbgipcevents.h
@@ -1938,6 +1938,9 @@ C_ASSERT(DBG_TARGET_REGNUM_AMBIENT_SP == ICorDebugInfo::REGNUM_AMBIENT_SP);
C_ASSERT(DBG_TARGET_REGNUM_SP == ICorDebugInfo::REGNUM_SP);
C_ASSERT(DBG_TARGET_REGNUM_AMBIENT_SP == ICorDebugInfo::REGNUM_AMBIENT_SP);
#endif
+#elif defined(TARGET_WASM)
+#define DBG_TARGET_REGNUM_SP 0
+#define DBG_TARGET_REGNUM_AMBIENT_SP 0
#else
#error Target registers are not defined for this platform
#endif
diff --git a/src/coreclr/debug/inc/dbgtargetcontext.h b/src/coreclr/debug/inc/dbgtargetcontext.h
index dab7ca29c7db33..ea374cf8b6def6 100644
--- a/src/coreclr/debug/inc/dbgtargetcontext.h
+++ b/src/coreclr/debug/inc/dbgtargetcontext.h
@@ -58,6 +58,8 @@
#define DTCONTEXT_IS_LOONGARCH64
#elif defined (TARGET_RISCV64)
#define DTCONTEXT_IS_RISCV64
+#elif defined (TARGET_WASM)
+#define DTCONTEXT_IS_WASM
#endif
#define CONTEXT_AREA_MASK 0xffff
@@ -614,6 +616,10 @@ typedef struct DECLSPEC_ALIGN(16) {
static_assert(sizeof(DT_CONTEXT) == sizeof(T_CONTEXT), "DT_CONTEXT size must equal the T_CONTEXT size");
+#elif defined(DTCONTEXT_IS_WASM)
+// no context for wasm
+typedef struct {
+} DT_CONTEXT;
#else
#error Unsupported platform
#endif
diff --git a/src/coreclr/debug/inc/wasm/primitives.h b/src/coreclr/debug/inc/wasm/primitives.h
new file mode 100644
index 00000000000000..5c428fe76ceb62
--- /dev/null
+++ b/src/coreclr/debug/inc/wasm/primitives.h
@@ -0,0 +1,39 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//*****************************************************************************
+// File: primitives.h
+//
+
+//
+// Platform-specific debugger primitives
+//
+//*****************************************************************************
+
+#ifndef PRIMITIVES_H_
+#define PRIMITIVES_H_
+
+inline CORDB_ADDRESS GetPatchEndAddr(CORDB_ADDRESS patchAddr)
+{
+ _ASSERTE("The function is not implemented on wasm");
+ return patchAddr;
+}
+
+typedef const BYTE CORDB_ADDRESS_TYPE;
+typedef DPTR(CORDB_ADDRESS_TYPE) PTR_CORDB_ADDRESS_TYPE;
+
+//This is an abstraction to keep x86/ia64 patch data separate
+#define PRD_TYPE USHORT
+
+#define MAX_INSTRUCTION_LENGTH 2 // update once we have codegen
+
+#define CORDbg_BREAK_INSTRUCTION_SIZE 1
+#define CORDbg_BREAK_INSTRUCTION 0 // unreachable intruction
+
+inline bool PRDIsEmpty(PRD_TYPE p1)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ return p1 == 0;
+}
+
+#endif
diff --git a/src/coreclr/debug/runtimeinfo/contracts.jsonc b/src/coreclr/debug/runtimeinfo/contracts.jsonc
index 8d0ecf9f679ff2..dc20297b01bed1 100644
--- a/src/coreclr/debug/runtimeinfo/contracts.jsonc
+++ b/src/coreclr/debug/runtimeinfo/contracts.jsonc
@@ -17,7 +17,7 @@
"Loader": 1,
"Object": 1,
"PlatformMetadata": 1,
- "PrecodeStubs": 1,
+ "PrecodeStubs": 2,
"ReJIT": 1,
"RuntimeTypeSystem": 1,
"StackWalk": 1,
diff --git a/src/coreclr/debug/runtimeinfo/datadescriptor.cpp b/src/coreclr/debug/runtimeinfo/datadescriptor.cpp
index 72a1ee18304578..c4d0aa3d42b645 100644
--- a/src/coreclr/debug/runtimeinfo/datadescriptor.cpp
+++ b/src/coreclr/debug/runtimeinfo/datadescriptor.cpp
@@ -13,6 +13,8 @@
#include "methodtable.h"
#include "threads.h"
+#include "../debug/ee/debugger.h"
+
#ifdef HAVE_GCCOVER
#include "gccover.h"
#endif // HAVE_GCCOVER
diff --git a/src/coreclr/debug/runtimeinfo/datadescriptor.h b/src/coreclr/debug/runtimeinfo/datadescriptor.h
index dc6e45baf9bf8c..b23b5ea39eb00d 100644
--- a/src/coreclr/debug/runtimeinfo/datadescriptor.h
+++ b/src/coreclr/debug/runtimeinfo/datadescriptor.h
@@ -492,10 +492,17 @@ CDAC_TYPE_END(PlatformMetadata)
CDAC_TYPE_BEGIN(StubPrecodeData)
CDAC_TYPE_INDETERMINATE(StubPrecodeData)
-CDAC_TYPE_FIELD(StubPrecodeData, /*pointer*/, MethodDesc, offsetof(StubPrecodeData, SecretParam))
+CDAC_TYPE_FIELD(StubPrecodeData, /*pointer*/, SecretParam, offsetof(StubPrecodeData, SecretParam))
CDAC_TYPE_FIELD(StubPrecodeData, /*uint8*/, Type, offsetof(StubPrecodeData, Type))
CDAC_TYPE_END(StubPrecodeData)
+#ifdef HAS_THISPTR_RETBUF_PRECODE
+CDAC_TYPE_BEGIN(ThisPtrRetBufPrecodeData)
+CDAC_TYPE_INDETERMINATE(ThisPtrRetBufPrecodeData)
+CDAC_TYPE_FIELD(ThisPtrRetBufPrecodeData, /*pointer*/, MethodDesc, offsetof(ThisPtrRetBufPrecodeData, MethodDesc))
+CDAC_TYPE_END(ThisPtrRetBufPrecodeData)
+#endif
+
CDAC_TYPE_BEGIN(FixupPrecodeData)
CDAC_TYPE_INDETERMINATE(FixupPrecodeData)
CDAC_TYPE_FIELD(FixupPrecodeData, /*pointer*/, MethodDesc, offsetof(FixupPrecodeData, MethodDesc))
@@ -645,6 +652,108 @@ CDAC_TYPE_FIELD(SoftwareExceptionFrame, /*pointer*/, ReturnAddress, cdac_data::TransitionBlockPtr)
+CDAC_TYPE_END(FramedMethodFrame)
+
+CDAC_TYPE_BEGIN(TransitionBlock)
+CDAC_TYPE_SIZE(sizeof(TransitionBlock))
+CDAC_TYPE_FIELD(TransitionBlock, /*pointer*/, ReturnAddress, offsetof(TransitionBlock, m_ReturnAddress))
+CDAC_TYPE_FIELD(TransitionBlock, /*CalleeSavedRegisters*/, CalleeSavedRegisters, offsetof(TransitionBlock, m_calleeSavedRegisters))
+CDAC_TYPE_END(TransitionBlock)
+
+#ifdef DEBUGGING_SUPPORTED
+CDAC_TYPE_BEGIN(FuncEvalFrame)
+CDAC_TYPE_SIZE(sizeof(FuncEvalFrame))
+CDAC_TYPE_FIELD(FuncEvalFrame, /*pointer*/, DebuggerEvalPtr, cdac_data::DebuggerEvalPtr)
+CDAC_TYPE_END(FuncEvalFrame)
+
+CDAC_TYPE_BEGIN(DebuggerEval)
+CDAC_TYPE_SIZE(sizeof(DebuggerEval))
+CDAC_TYPE_FIELD(DebuggerEval, /*T_CONTEXT*/, TargetContext, offsetof(DebuggerEval, m_context))
+CDAC_TYPE_FIELD(DebuggerEval, /*bool*/, EvalDuringException, offsetof(DebuggerEval, m_evalDuringException))
+CDAC_TYPE_END(DebuggerEval)
+#endif // DEBUGGING_SUPPORTED
+
+#ifdef FEATURE_HIJACK
+CDAC_TYPE_BEGIN(ResumableFrame)
+CDAC_TYPE_SIZE(sizeof(ResumableFrame))
+CDAC_TYPE_FIELD(ResumableFrame, /*pointer*/, TargetContextPtr, cdac_data::TargetContextPtr)
+CDAC_TYPE_END(ResumableFrame)
+
+CDAC_TYPE_BEGIN(HijackFrame)
+CDAC_TYPE_SIZE(sizeof(HijackFrame))
+CDAC_TYPE_FIELD(HijackFrame, /*pointer*/, ReturnAddress, cdac_data::ReturnAddress)
+CDAC_TYPE_FIELD(HijackFrame, /*pointer*/, HijackArgsPtr, cdac_data::HijackArgsPtr)
+CDAC_TYPE_END(HijackFrame)
+
+// HijackArgs struct is different on each platform
+CDAC_TYPE_BEGIN(HijackArgs)
+CDAC_TYPE_SIZE(sizeof(HijackArgs))
+#if defined(TARGET_AMD64)
+
+CDAC_TYPE_FIELD(HijackArgs, /*CalleeSavedRegisters*/, CalleeSavedRegisters, offsetof(HijackArgs, Regs))
+#ifdef TARGET_WINDOWS
+CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, Rsp, offsetof(HijackArgs, Rsp))
+#endif // TARGET_WINDOWS
+
+#elif defined(TARGET_ARM64)
+
+CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X0, offsetof(HijackArgs, X0))
+CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X1, offsetof(HijackArgs, X1))
+CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X19, offsetof(HijackArgs, X19))
+CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X20, offsetof(HijackArgs, X20))
+CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X21, offsetof(HijackArgs, X21))
+CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X22, offsetof(HijackArgs, X22))
+CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X23, offsetof(HijackArgs, X23))
+CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X24, offsetof(HijackArgs, X24))
+CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X25, offsetof(HijackArgs, X25))
+CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X26, offsetof(HijackArgs, X26))
+CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X27, offsetof(HijackArgs, X27))
+CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, X28, offsetof(HijackArgs, X28))
+CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, Fp, offsetof(HijackArgs, X29))
+CDAC_TYPE_FIELD(HijackArgs, /*pointer*/, Lr, offsetof(HijackArgs, Lr))
+
+#endif // Platform switch
+CDAC_TYPE_END(HijackArgs)
+#endif // FEATURE_HIJACK
+
+CDAC_TYPE_BEGIN(FaultingExceptionFrame)
+CDAC_TYPE_SIZE(sizeof(FaultingExceptionFrame))
+#ifdef FEATURE_EH_FUNCLETS
+CDAC_TYPE_FIELD(FaultingExceptionFrame, /*T_CONTEXT*/, TargetContext, cdac_data::TargetContext)
+#endif // FEATURE_EH_FUNCLETS
+CDAC_TYPE_END(FaultingExceptionFrame)
+
+// CalleeSavedRegisters struct is different on each platform
+CDAC_TYPE_BEGIN(CalleeSavedRegisters)
+CDAC_TYPE_SIZE(sizeof(CalleeSavedRegisters))
+#if defined(TARGET_AMD64)
+
+#define CALLEE_SAVED_REGISTER(regname) \
+ CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, regname, offsetof(CalleeSavedRegisters, regname))
+ENUM_CALLEE_SAVED_REGISTERS()
+#undef CALLEE_SAVED_REGISTER
+
+#elif defined(TARGET_ARM64)
+
+CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X19, offsetof(CalleeSavedRegisters, x19))
+CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X20, offsetof(CalleeSavedRegisters, x20))
+CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X21, offsetof(CalleeSavedRegisters, x21))
+CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X22, offsetof(CalleeSavedRegisters, x22))
+CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X23, offsetof(CalleeSavedRegisters, x23))
+CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X24, offsetof(CalleeSavedRegisters, x24))
+CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X25, offsetof(CalleeSavedRegisters, x25))
+CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X26, offsetof(CalleeSavedRegisters, x26))
+CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X27, offsetof(CalleeSavedRegisters, x27))
+CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, X28, offsetof(CalleeSavedRegisters, x28))
+CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, Fp, offsetof(CalleeSavedRegisters, x29))
+CDAC_TYPE_FIELD(CalleeSavedRegisters, /*nuint*/, Lr, offsetof(CalleeSavedRegisters, x30))
+
+#endif // Platform switch
+CDAC_TYPE_END(CalleeSavedRegisters)
+
CDAC_TYPES_END()
CDAC_GLOBALS_BEGIN()
diff --git a/src/coreclr/dlls/CMakeLists.txt b/src/coreclr/dlls/CMakeLists.txt
index 9bd79f94d6fd91..0a2ab14d8db82f 100644
--- a/src/coreclr/dlls/CMakeLists.txt
+++ b/src/coreclr/dlls/CMakeLists.txt
@@ -2,9 +2,13 @@ if(CLR_CMAKE_TARGET_WIN32 AND FEATURE_EVENT_TRACE)
add_subdirectory(clretwrc)
endif(CLR_CMAKE_TARGET_WIN32 AND FEATURE_EVENT_TRACE)
if (NOT (CLR_CMAKE_TARGET_WIN32 AND FEATURE_CROSSBITNESS))
- add_subdirectory(mscordbi)
- add_subdirectory(mscordac)
+ if (NOT CLR_CMAKE_TARGET_ARCH_WASM)
+ add_subdirectory(mscordbi)
+ add_subdirectory(mscordac)
+ endif()
add_subdirectory(mscoree)
endif()
-add_subdirectory(mscorpe)
+if (NOT CLR_CMAKE_TARGET_ARCH_WASM)
+ add_subdirectory(mscorpe)
+endif()
add_subdirectory(mscorrc)
diff --git a/src/coreclr/dlls/mscordac/mscordac_unixexports.src b/src/coreclr/dlls/mscordac/mscordac_unixexports.src
index 0857ba2884f78f..206ae4091b7c82 100644
--- a/src/coreclr/dlls/mscordac/mscordac_unixexports.src
+++ b/src/coreclr/dlls/mscordac/mscordac_unixexports.src
@@ -61,7 +61,6 @@ nativeStringResourceTable_mscorrc
#memcpy_s
#sscanf_s
-#CopyFileW
#CreateFileMappingW
#CreateFileA
#CreateFileW
@@ -101,7 +100,6 @@ nativeStringResourceTable_mscorrc
#GetSystemInfo
#GetSystemTime
#GetSystemTimeAsFileTime
-#GetTempFileNameW
#GetTempPathA
#GetTempPathW
#InitializeCriticalSection
diff --git a/src/coreclr/gc/env/gcenv.base.h b/src/coreclr/gc/env/gcenv.base.h
index 1603448ae2a4f8..94d88e46466940 100644
--- a/src/coreclr/gc/env/gcenv.base.h
+++ b/src/coreclr/gc/env/gcenv.base.h
@@ -143,7 +143,10 @@ typedef DWORD (WINAPI *PTHREAD_START_ROUTINE)(void* lpThreadParameter);
#pragma intrinsic(__dmb)
#define MemoryBarrier() { __dmb(_ARM64_BARRIER_SY); }
- #elif defined(HOST_AMD64)
+ #elif defined(HOST_BROWSER)
+ #define YieldProcessor()
+ #define MemoryBarrier __sync_synchronize
+#elif defined(HOST_AMD64)
extern "C" void
_mm_pause (
diff --git a/src/coreclr/gc/unix/gcenv.unix.cpp b/src/coreclr/gc/unix/gcenv.unix.cpp
index 43588c66eb015a..5a199f4b7db69e 100644
--- a/src/coreclr/gc/unix/gcenv.unix.cpp
+++ b/src/coreclr/gc/unix/gcenv.unix.cpp
@@ -35,6 +35,9 @@
#define membarrier(...) syscall(__NR_membarrier, __VA_ARGS__)
#elif HAVE_SYS_MEMBARRIER_H
#include
+#ifdef TARGET_BROWSER
+#define membarrier(cmd, flags, cpu_id) 0 // browser/wasm is currently single threaded
+#endif
#endif
#include
diff --git a/src/coreclr/gcinfo/gcinfoencoder.cpp b/src/coreclr/gcinfo/gcinfoencoder.cpp
index b512e92a6e3f28..ffac0f713ab77e 100644
--- a/src/coreclr/gcinfo/gcinfoencoder.cpp
+++ b/src/coreclr/gcinfo/gcinfoencoder.cpp
@@ -10,6 +10,9 @@
#include
#include "gcinfoencoder.h"
+
+using namespace GcInfoEncoderExt;
+
#include "targetosarch.h"
#ifdef _DEBUG
@@ -428,7 +431,7 @@ void GcInfoSize::Log(DWORD level, const char * header)
#endif
-GcInfoEncoder::GcInfoEncoder(
+template TGcInfoEncoder::TGcInfoEncoder(
ICorJitInfo* pCorJitInfo,
CORINFO_METHOD_INFO* pMethodInfo,
IAllocator* pJitAllocator,
@@ -498,7 +501,7 @@ GcInfoEncoder::GcInfoEncoder(
}
#ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED
-void GcInfoEncoder::DefineCallSites(UINT32* pCallSites, BYTE* pCallSiteSizes, UINT32 numCallSites)
+template void TGcInfoEncoder::DefineCallSites(UINT32* pCallSites, BYTE* pCallSiteSizes, UINT32 numCallSites)
{
m_pCallSites = pCallSites;
m_pCallSiteSizes = pCallSiteSizes;
@@ -507,7 +510,7 @@ void GcInfoEncoder::DefineCallSites(UINT32* pCallSites, BYTE* pCallSiteSizes, UI
for(UINT32 i=0; i 0);
- _ASSERTE(DENORMALIZE_CODE_OFFSET(NORMALIZE_CODE_OFFSET(pCallSites[i])) == pCallSites[i]);
+ _ASSERTE(GcInfoEncoding::DENORMALIZE_CODE_OFFSET(GcInfoEncoding::NORMALIZE_CODE_OFFSET(pCallSites[i])) == pCallSites[i]);
if(i > 0)
{
UINT32 prevEnd = pCallSites[i-1] + pCallSiteSizes[i-1];
@@ -519,7 +522,7 @@ void GcInfoEncoder::DefineCallSites(UINT32* pCallSites, BYTE* pCallSiteSizes, UI
}
#endif
-GcSlotId GcInfoEncoder::GetRegisterSlotId( UINT32 regNum, GcSlotFlags flags )
+template GcSlotId TGcInfoEncoder::GetRegisterSlotId( UINT32 regNum, GcSlotFlags flags )
{
// We could lookup an existing identical slot in the slot table (via some hashtable mechanism).
// We just create duplicates for now.
@@ -544,7 +547,7 @@ GcSlotId GcInfoEncoder::GetRegisterSlotId( UINT32 regNum, GcSlotFlags flags )
return newSlotId;
}
-GcSlotId GcInfoEncoder::GetStackSlotId( INT32 spOffset, GcSlotFlags flags, GcStackSlotBase spBase )
+template GcSlotId TGcInfoEncoder::GetStackSlotId( INT32 spOffset, GcSlotFlags flags, GcStackSlotBase spBase )
{
// We could lookup an existing identical slot in the slot table (via some hashtable mechanism).
// We just create duplicates for now.
@@ -580,7 +583,7 @@ GcSlotId GcInfoEncoder::GetStackSlotId( INT32 spOffset, GcSlotFlags flags, GcSta
return newSlotId;
}
-void GcInfoEncoder::GrowSlotTable()
+template void TGcInfoEncoder::GrowSlotTable()
{
m_SlotTableSize *= 2;
GcSlotDesc* newSlotTable = (GcSlotDesc*) m_pAllocator->Alloc( m_SlotTableSize * sizeof(GcSlotDesc) );
@@ -593,7 +596,7 @@ void GcInfoEncoder::GrowSlotTable()
m_SlotTable = newSlotTable;
}
-void GcInfoEncoder::WriteSlotStateVector(BitStreamWriter &writer, const BitArray& vector)
+template void TGcInfoEncoder::WriteSlotStateVector(BitStreamWriter &writer, const BitArray& vector)
{
for(UINT32 i = 0; i < m_NumSlots && !m_SlotTable[i].IsUntracked(); i++)
{
@@ -604,12 +607,12 @@ void GcInfoEncoder::WriteSlotStateVector(BitStreamWriter &writer, const BitArray
}
}
-void GcInfoEncoder::DefineInterruptibleRange( UINT32 startInstructionOffset, UINT32 length )
+template void TGcInfoEncoder::DefineInterruptibleRange( UINT32 startInstructionOffset, UINT32 length )
{
UINT32 stopInstructionOffset = startInstructionOffset + length;
- UINT32 normStartOffset = NORMALIZE_CODE_OFFSET(startInstructionOffset);
- UINT32 normStopOffset = NORMALIZE_CODE_OFFSET(stopInstructionOffset);
+ UINT32 normStartOffset = GcInfoEncoding::NORMALIZE_CODE_OFFSET(startInstructionOffset);
+ UINT32 normStopOffset = GcInfoEncoding::NORMALIZE_CODE_OFFSET(stopInstructionOffset);
// Ranges must not overlap and must be passed sorted by increasing offset
_ASSERTE(
@@ -644,7 +647,7 @@ void GcInfoEncoder::DefineInterruptibleRange( UINT32 startInstructionOffset, UIN
//
// For inputs, pass zero as offset
//
-void GcInfoEncoder::SetSlotState(
+template void TGcInfoEncoder::SetSlotState(
UINT32 instructionOffset,
GcSlotId slotId,
GcSlotState slotState
@@ -665,19 +668,19 @@ void GcInfoEncoder::SetSlotState(
}
-void GcInfoEncoder::SetIsVarArg()
+template void TGcInfoEncoder::SetIsVarArg()
{
m_IsVarArg = true;
}
-void GcInfoEncoder::SetCodeLength( UINT32 length )
+template void TGcInfoEncoder::SetCodeLength( UINT32 length )
{
_ASSERTE( length > 0 );
_ASSERTE( m_CodeLength == 0 || m_CodeLength == length );
m_CodeLength = length;
}
-void GcInfoEncoder::SetPrologSize( UINT32 prologSize )
+template void TGcInfoEncoder::SetPrologSize( UINT32 prologSize )
{
_ASSERTE(prologSize != 0);
_ASSERTE(m_GSCookieValidRangeStart == 0 || m_GSCookieValidRangeStart == prologSize);
@@ -688,7 +691,7 @@ void GcInfoEncoder::SetPrologSize( UINT32 prologSize )
m_GSCookieValidRangeEnd = prologSize+1;
}
-void GcInfoEncoder::SetGSCookieStackSlot( INT32 spOffsetGSCookie, UINT32 validRangeStart, UINT32 validRangeEnd )
+template void TGcInfoEncoder::SetGSCookieStackSlot( INT32 spOffsetGSCookie, UINT32 validRangeStart, UINT32 validRangeEnd )
{
_ASSERTE( spOffsetGSCookie != NO_GS_COOKIE );
_ASSERTE( m_GSCookieStackSlot == NO_GS_COOKIE || m_GSCookieStackSlot == spOffsetGSCookie );
@@ -699,7 +702,7 @@ void GcInfoEncoder::SetGSCookieStackSlot( INT32 spOffsetGSCookie, UINT32 validRa
m_GSCookieValidRangeEnd = validRangeEnd;
}
-void GcInfoEncoder::SetPSPSymStackSlot( INT32 spOffsetPSPSym )
+template void TGcInfoEncoder::SetPSPSymStackSlot( INT32 spOffsetPSPSym )
{
_ASSERTE( spOffsetPSPSym != NO_PSP_SYM );
_ASSERTE( m_PSPSymStackSlot == NO_PSP_SYM || m_PSPSymStackSlot == spOffsetPSPSym );
@@ -707,7 +710,7 @@ void GcInfoEncoder::SetPSPSymStackSlot( INT32 spOffsetPSPSym )
m_PSPSymStackSlot = spOffsetPSPSym;
}
-void GcInfoEncoder::SetGenericsInstContextStackSlot( INT32 spOffsetGenericsContext, GENERIC_CONTEXTPARAM_TYPE type)
+template void TGcInfoEncoder::SetGenericsInstContextStackSlot( INT32 spOffsetGenericsContext, GENERIC_CONTEXTPARAM_TYPE type)
{
_ASSERTE( spOffsetGenericsContext != NO_GENERICS_INST_CONTEXT);
_ASSERTE( m_GenericsInstContextStackSlot == NO_GENERICS_INST_CONTEXT || m_GenericsInstContextStackSlot == spOffsetGenericsContext );
@@ -716,10 +719,10 @@ void GcInfoEncoder::SetGenericsInstContextStackSlot( INT32 spOffsetGenericsConte
m_contextParamType = type;
}
-void GcInfoEncoder::SetStackBaseRegister( UINT32 regNum )
+template void TGcInfoEncoder::SetStackBaseRegister( UINT32 regNum )
{
_ASSERTE( regNum != NO_STACK_BASE_REGISTER );
- _ASSERTE(DENORMALIZE_STACK_BASE_REGISTER(NORMALIZE_STACK_BASE_REGISTER(regNum)) == regNum);
+ _ASSERTE(GcInfoEncoding::DENORMALIZE_STACK_BASE_REGISTER(GcInfoEncoding::NORMALIZE_STACK_BASE_REGISTER(regNum)) == regNum);
_ASSERTE( m_StackBaseRegister == NO_STACK_BASE_REGISTER || m_StackBaseRegister == regNum );
#if defined(TARGET_LOONGARCH64)
assert(regNum == 3 || 22 == regNum);
@@ -729,7 +732,7 @@ void GcInfoEncoder::SetStackBaseRegister( UINT32 regNum )
m_StackBaseRegister = regNum;
}
-void GcInfoEncoder::SetSizeOfEditAndContinuePreservedArea( UINT32 slots )
+template void TGcInfoEncoder::SetSizeOfEditAndContinuePreservedArea( UINT32 slots )
{
_ASSERTE( slots != NO_SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA );
_ASSERTE( m_SizeOfEditAndContinuePreservedArea == NO_SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA );
@@ -737,26 +740,26 @@ void GcInfoEncoder::SetSizeOfEditAndContinuePreservedArea( UINT32 slots )
}
#ifdef TARGET_ARM64
-void GcInfoEncoder::SetSizeOfEditAndContinueFixedStackFrame( UINT32 size )
+template void TGcInfoEncoder::SetSizeOfEditAndContinueFixedStackFrame( UINT32 size )
{
m_SizeOfEditAndContinueFixedStackFrame = size;
}
#endif
#ifdef TARGET_AMD64
-void GcInfoEncoder::SetWantsReportOnlyLeaf()
+template void TGcInfoEncoder::SetWantsReportOnlyLeaf()
{
m_WantsReportOnlyLeaf = true;
}
#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
-void GcInfoEncoder::SetHasTailCalls()
+template void TGcInfoEncoder::SetHasTailCalls()
{
m_HasTailCalls = true;
}
#endif // TARGET_AMD64
#ifdef FIXED_STACK_PARAMETER_SCRATCH_AREA
-void GcInfoEncoder::SetSizeOfStackOutgoingAndScratchArea( UINT32 size )
+template void TGcInfoEncoder::SetSizeOfStackOutgoingAndScratchArea( UINT32 size )
{
_ASSERTE( size != (UINT32)-1 );
_ASSERTE( m_SizeOfStackOutgoingAndScratchArea == (UINT32)-1 || m_SizeOfStackOutgoingAndScratchArea == size );
@@ -764,7 +767,7 @@ void GcInfoEncoder::SetSizeOfStackOutgoingAndScratchArea( UINT32 size )
}
#endif // FIXED_STACK_PARAMETER_SCRATCH_AREA
-void GcInfoEncoder::SetReversePInvokeFrameSlot(INT32 spOffset)
+template void TGcInfoEncoder::SetReversePInvokeFrameSlot(INT32 spOffset)
{
m_ReversePInvokeFrameSlot = spOffset;
}
@@ -819,7 +822,7 @@ struct CompareSlotDescAndIdBySlotDesc
struct CompareLifetimeTransitionsByOffsetThenSlot
{
- bool operator()(const GcInfoEncoder::LifetimeTransition& first, const GcInfoEncoder::LifetimeTransition& second)
+ bool operator()(const GcInfoEncoderExt::LifetimeTransition& first, const GcInfoEncoderExt::LifetimeTransition& second)
{
UINT32 firstOffset = first.CodeOffset;
UINT32 secondOffset = second.CodeOffset;
@@ -837,12 +840,13 @@ struct CompareLifetimeTransitionsByOffsetThenSlot
struct CompareLifetimeTransitionsBySlot
{
- bool operator()(const GcInfoEncoder::LifetimeTransition& first, const GcInfoEncoder::LifetimeTransition& second)
+ bool operator()(const GcInfoEncoderExt::LifetimeTransition& first, const GcInfoEncoderExt::LifetimeTransition& second)
{
UINT32 firstOffset = first.CodeOffset;
UINT32 secondOffset = second.CodeOffset;
- _ASSERTE(GetNormCodeOffsetChunk(firstOffset) == GetNormCodeOffsetChunk(secondOffset));
+ // FIXME: GcInfoEncoding::
+ // _ASSERTE(GetNormCodeOffsetChunk(firstOffset) == GetNormCodeOffsetChunk(secondOffset));
// Sort them by slot
if( first.SlotId != second.SlotId)
@@ -902,19 +906,19 @@ void BitStreamWriter::MemoryBlockList::Dispose(IAllocator* allocator)
#endif
}
-void GcInfoEncoder::FinalizeSlotIds()
+template void TGcInfoEncoder::FinalizeSlotIds()
{
#ifdef _DEBUG
m_IsSlotTableFrozen = TRUE;
#endif
}
-void GcInfoEncoder::Build()
+template void TGcInfoEncoder::Build()
{
#ifdef _DEBUG
_ASSERTE(m_IsSlotTableFrozen || m_NumSlots == 0);
- _ASSERTE((1 << NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2) == NUM_NORM_CODE_OFFSETS_PER_CHUNK);
+ _ASSERTE((1 << GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2) == GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK);
char methodName[256];
m_pCorJitInfo->printMethodName(m_pMethodInfo->ftn, methodName, sizeof(methodName));
@@ -939,7 +943,7 @@ void GcInfoEncoder::Build()
BOOL slimHeader = (!m_IsVarArg && !hasGSCookie && (m_PSPSymStackSlot == NO_PSP_SYM) &&
!hasContextParamType && (m_InterruptibleRanges.Count() == 0) && !hasReversePInvokeFrame &&
- ((m_StackBaseRegister == NO_STACK_BASE_REGISTER) || (NORMALIZE_STACK_BASE_REGISTER(m_StackBaseRegister) == 0))) &&
+ ((m_StackBaseRegister == NO_STACK_BASE_REGISTER) || (GcInfoEncoding::NORMALIZE_STACK_BASE_REGISTER(m_StackBaseRegister) == 0))) &&
#ifdef TARGET_AMD64
!m_WantsReportOnlyLeaf &&
#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
@@ -984,8 +988,8 @@ void GcInfoEncoder::Build()
}
_ASSERTE( m_CodeLength > 0 );
- _ASSERTE(DENORMALIZE_CODE_LENGTH(NORMALIZE_CODE_LENGTH(m_CodeLength)) == m_CodeLength);
- GCINFO_WRITE_VARL_U(m_Info1, NORMALIZE_CODE_LENGTH(m_CodeLength), CODE_LENGTH_ENCBASE, CodeLengthSize);
+ _ASSERTE(GcInfoEncoding::DENORMALIZE_CODE_LENGTH(GcInfoEncoding::NORMALIZE_CODE_LENGTH(m_CodeLength)) == m_CodeLength);
+ GCINFO_WRITE_VARL_U(m_Info1, GcInfoEncoding::NORMALIZE_CODE_LENGTH(m_CodeLength), GcInfoEncoding::CODE_LENGTH_ENCBASE, CodeLengthSize);
if(hasGSCookie)
{
@@ -999,13 +1003,13 @@ void GcInfoEncoder::Build()
_ASSERTE(intersectionStart > 0 && intersectionStart < m_CodeLength);
_ASSERTE(intersectionEnd > 0 && intersectionEnd <= m_CodeLength);
_ASSERTE(intersectionStart <= intersectionEnd);
- UINT32 normPrologSize = NORMALIZE_CODE_OFFSET(intersectionStart);
- UINT32 normEpilogSize = NORMALIZE_CODE_OFFSET(m_CodeLength) - NORMALIZE_CODE_OFFSET(intersectionEnd);
+ UINT32 normPrologSize = GcInfoEncoding::NORMALIZE_CODE_OFFSET(intersectionStart);
+ UINT32 normEpilogSize = GcInfoEncoding::NORMALIZE_CODE_OFFSET(m_CodeLength) - GcInfoEncoding::NORMALIZE_CODE_OFFSET(intersectionEnd);
_ASSERTE(normPrologSize > 0 && normPrologSize < m_CodeLength);
_ASSERTE(normEpilogSize < m_CodeLength);
- GCINFO_WRITE_VARL_U(m_Info1, normPrologSize-1, NORM_PROLOG_SIZE_ENCBASE, ProEpilogSize);
- GCINFO_WRITE_VARL_U(m_Info1, normEpilogSize, NORM_EPILOG_SIZE_ENCBASE, ProEpilogSize);
+ GCINFO_WRITE_VARL_U(m_Info1, normPrologSize-1, GcInfoEncoding::NORM_PROLOG_SIZE_ENCBASE, ProEpilogSize);
+ GCINFO_WRITE_VARL_U(m_Info1, normEpilogSize, GcInfoEncoding::NORM_EPILOG_SIZE_ENCBASE, ProEpilogSize);
}
else if (hasContextParamType)
{
@@ -1013,10 +1017,10 @@ void GcInfoEncoder::Build()
// Save the prolog size, to be used for determining when it is not safe
// to report generics param context and the security object
_ASSERTE(m_GSCookieValidRangeStart > 0 && m_GSCookieValidRangeStart < m_CodeLength);
- UINT32 normPrologSize = NORMALIZE_CODE_OFFSET(m_GSCookieValidRangeStart);
+ UINT32 normPrologSize = GcInfoEncoding::NORMALIZE_CODE_OFFSET(m_GSCookieValidRangeStart);
_ASSERTE(normPrologSize > 0 && normPrologSize < m_CodeLength);
- GCINFO_WRITE_VARL_U(m_Info1, normPrologSize-1, NORM_PROLOG_SIZE_ENCBASE, ProEpilogSize);
+ GCINFO_WRITE_VARL_U(m_Info1, normPrologSize-1, GcInfoEncoding::NORM_PROLOG_SIZE_ENCBASE, ProEpilogSize);
}
// Encode the offset to the GS cookie.
@@ -1029,7 +1033,7 @@ void GcInfoEncoder::Build()
));
#endif
- GCINFO_WRITE_VARL_S(m_Info1, NORMALIZE_STACK_SLOT(m_GSCookieStackSlot), GS_COOKIE_STACK_SLOT_ENCBASE, GsCookieSize);
+ GCINFO_WRITE_VARL_S(m_Info1, GcInfoEncoding::NORMALIZE_STACK_SLOT(m_GSCookieStackSlot), GcInfoEncoding::GS_COOKIE_STACK_SLOT_ENCBASE, GsCookieSize);
}
@@ -1041,7 +1045,7 @@ void GcInfoEncoder::Build()
#ifdef _DEBUG
LOG((LF_GCINFO, LL_INFO1000, "Parent PSP at " FMT_STK "\n", DBG_STK(m_PSPSymStackSlot)));
#endif
- GCINFO_WRITE_VARL_S(m_Info1, NORMALIZE_STACK_SLOT(m_PSPSymStackSlot), PSP_SYM_STACK_SLOT_ENCBASE, PspSymSize);
+ GCINFO_WRITE_VARL_S(m_Info1, GcInfoEncoding::NORMALIZE_STACK_SLOT(m_PSPSymStackSlot), GcInfoEncoding::PSP_SYM_STACK_SLOT_ENCBASE, PspSymSize);
}
// Encode the offset to the generics type context.
@@ -1053,7 +1057,7 @@ void GcInfoEncoder::Build()
DBG_STK(m_GenericsInstContextStackSlot)
));
#endif
- GCINFO_WRITE_VARL_S(m_Info1, NORMALIZE_STACK_SLOT(m_GenericsInstContextStackSlot), GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE, GenericsCtxSize);
+ GCINFO_WRITE_VARL_S(m_Info1, GcInfoEncoding::NORMALIZE_STACK_SLOT(m_GenericsInstContextStackSlot), GcInfoEncoding::GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE, GenericsCtxSize);
}
if(!slimHeader && (m_StackBaseRegister != NO_STACK_BASE_REGISTER))
@@ -1063,28 +1067,28 @@ void GcInfoEncoder::Build()
#elif defined(TARGET_RISCV64)
assert(m_StackBaseRegister == 8 || 2 == m_StackBaseRegister);
#endif
- GCINFO_WRITE_VARL_U(m_Info1, NORMALIZE_STACK_BASE_REGISTER(m_StackBaseRegister), STACK_BASE_REGISTER_ENCBASE, StackBaseSize);
+ GCINFO_WRITE_VARL_U(m_Info1, GcInfoEncoding::NORMALIZE_STACK_BASE_REGISTER(m_StackBaseRegister), GcInfoEncoding::STACK_BASE_REGISTER_ENCBASE, StackBaseSize);
}
if (m_SizeOfEditAndContinuePreservedArea != NO_SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA)
{
- GCINFO_WRITE_VARL_U(m_Info1, m_SizeOfEditAndContinuePreservedArea, SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE, EncInfoSize);
+ GCINFO_WRITE_VARL_U(m_Info1, m_SizeOfEditAndContinuePreservedArea, GcInfoEncoding::SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE, EncInfoSize);
#ifdef TARGET_ARM64
- GCINFO_WRITE_VARL_U(m_Info1, m_SizeOfEditAndContinueFixedStackFrame, SIZE_OF_EDIT_AND_CONTINUE_FIXED_STACK_FRAME_ENCBASE, EncInfoSize);
+ GCINFO_WRITE_VARL_U(m_Info1, m_SizeOfEditAndContinueFixedStackFrame, GcInfoEncoding::SIZE_OF_EDIT_AND_CONTINUE_FIXED_STACK_FRAME_ENCBASE, EncInfoSize);
#endif
}
if (hasReversePInvokeFrame)
{
_ASSERTE(!slimHeader);
- GCINFO_WRITE_VARL_S(m_Info1, NORMALIZE_STACK_SLOT(m_ReversePInvokeFrameSlot), REVERSE_PINVOKE_FRAME_ENCBASE, ReversePInvokeFrameSize);
+ GCINFO_WRITE_VARL_S(m_Info1, GcInfoEncoding::NORMALIZE_STACK_SLOT(m_ReversePInvokeFrameSlot), GcInfoEncoding::REVERSE_PINVOKE_FRAME_ENCBASE, ReversePInvokeFrameSize);
}
#ifdef FIXED_STACK_PARAMETER_SCRATCH_AREA
if (!slimHeader)
{
_ASSERTE( m_SizeOfStackOutgoingAndScratchArea != (UINT32)-1 );
- GCINFO_WRITE_VARL_U(m_Info1, NORMALIZE_SIZE_OF_STACK_AREA(m_SizeOfStackOutgoingAndScratchArea), SIZE_OF_STACK_AREA_ENCBASE, FixedAreaSize);
+ GCINFO_WRITE_VARL_U(m_Info1, GcInfoEncoding::NORMALIZE_SIZE_OF_STACK_AREA(m_SizeOfStackOutgoingAndScratchArea), GcInfoEncoding::SIZE_OF_STACK_AREA_ENCBASE, FixedAreaSize);
}
#endif // FIXED_STACK_PARAMETER_SCRATCH_AREA
@@ -1117,12 +1121,12 @@ void GcInfoEncoder::Build()
UINT32 callSite = m_pCallSites[callSiteIndex];
callSite += m_pCallSiteSizes[callSiteIndex];
- _ASSERTE(DENORMALIZE_CODE_OFFSET(NORMALIZE_CODE_OFFSET(callSite)) == callSite);
- UINT32 normOffset = NORMALIZE_CODE_OFFSET(callSite);
+ _ASSERTE(GcInfoEncoding::DENORMALIZE_CODE_OFFSET(GcInfoEncoding::NORMALIZE_CODE_OFFSET(callSite)) == callSite);
+ UINT32 normOffset = GcInfoEncoding::NORMALIZE_CODE_OFFSET(callSite);
m_pCallSites[numCallSites++] = normOffset;
}
- GCINFO_WRITE_VARL_U(m_Info1, NORMALIZE_NUM_SAFE_POINTS(numCallSites), NUM_SAFE_POINTS_ENCBASE, NumCallSitesSize);
+ GCINFO_WRITE_VARL_U(m_Info1, numCallSites, GcInfoEncoding::NUM_SAFE_POINTS_ENCBASE, NumCallSitesSize);
m_NumCallSites = numCallSites;
#endif // PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED
@@ -1132,7 +1136,7 @@ void GcInfoEncoder::Build()
}
else
{
- GCINFO_WRITE_VARL_U(m_Info1, NORMALIZE_NUM_INTERRUPTIBLE_RANGES(numInterruptibleRanges), NUM_INTERRUPTIBLE_RANGES_ENCBASE, NumRangesSize);
+ GCINFO_WRITE_VARL_U(m_Info1, numInterruptibleRanges, GcInfoEncoding::NUM_INTERRUPTIBLE_RANGES_ENCBASE, NumRangesSize);
}
@@ -1142,7 +1146,7 @@ void GcInfoEncoder::Build()
// Encode call site offsets
///////////////////////////////////////////////////////////////////////
- UINT32 numBitsPerOffset = CeilOfLog2(NORMALIZE_CODE_OFFSET(m_CodeLength));
+ UINT32 numBitsPerOffset = CeilOfLog2(GcInfoEncoding::NORMALIZE_CODE_OFFSET(m_CodeLength));
for(UINT32 callSiteIndex = 0; callSiteIndex < m_NumCallSites; callSiteIndex++)
{
@@ -1173,9 +1177,9 @@ void GcInfoEncoder::Build()
lastStopOffset = normStopOffset;
- GCINFO_WRITE_VARL_U(m_Info1, normStartDelta, INTERRUPTIBLE_RANGE_DELTA1_ENCBASE, RangeSize);
+ GCINFO_WRITE_VARL_U(m_Info1, normStartDelta, GcInfoEncoding::INTERRUPTIBLE_RANGE_DELTA1_ENCBASE, RangeSize);
- GCINFO_WRITE_VARL_U(m_Info1, normStopDelta-1, INTERRUPTIBLE_RANGE_DELTA2_ENCBASE, RangeSize);
+ GCINFO_WRITE_VARL_U(m_Info1, normStopDelta-1, GcInfoEncoding::INTERRUPTIBLE_RANGE_DELTA2_ENCBASE, RangeSize);
}
}
@@ -1260,14 +1264,14 @@ void GcInfoEncoder::Build()
#endif
}
-#if CODE_OFFSETS_NEED_NORMALIZATION
- // Do a pass to normalize transition offsets
- for(pCurrent = pTransitions; pCurrent < pEndTransitions; pCurrent++)
- {
- _ASSERTE(pCurrent->CodeOffset <= m_CodeLength);
- pCurrent->CodeOffset = NORMALIZE_CODE_OFFSET(pCurrent->CodeOffset);
+ if (GcInfoEncoding::CODE_OFFSETS_NEED_NORMALIZATION) {
+ // Do a pass to normalize transition offsets
+ for(pCurrent = pTransitions; pCurrent < pEndTransitions; pCurrent++)
+ {
+ _ASSERTE(pCurrent->CodeOffset <= m_CodeLength);
+ pCurrent->CodeOffset = GcInfoEncoding::NORMALIZE_CODE_OFFSET(pCurrent->CodeOffset);
+ }
}
-#endif
///////////////////////////////////////////////////////////////////
// Find out which slots are really used
@@ -1444,7 +1448,7 @@ void GcInfoEncoder::Build()
if (numRegisters)
{
GCINFO_WRITE(m_Info1, 1, 1, FlagsSize);
- GCINFO_WRITE_VARL_U(m_Info1, numRegisters, NUM_REGISTERS_ENCBASE, NumRegsSize);
+ GCINFO_WRITE_VARL_U(m_Info1, numRegisters, GcInfoEncoding::NUM_REGISTERS_ENCBASE, NumRegsSize);
}
else
{
@@ -1453,8 +1457,8 @@ void GcInfoEncoder::Build()
if (numStackSlots || numUntrackedSlots)
{
GCINFO_WRITE(m_Info1, 1, 1, FlagsSize);
- GCINFO_WRITE_VARL_U(m_Info1, numStackSlots, NUM_STACK_SLOTS_ENCBASE, NumStackSize);
- GCINFO_WRITE_VARL_U(m_Info1, numUntrackedSlots, NUM_UNTRACKED_SLOTS_ENCBASE, NumUntrackedSize);
+ GCINFO_WRITE_VARL_U(m_Info1, numStackSlots, GcInfoEncoding::NUM_STACK_SLOTS_ENCBASE, NumStackSize);
+ GCINFO_WRITE_VARL_U(m_Info1, numUntrackedSlots, GcInfoEncoding::NUM_UNTRACKED_SLOTS_ENCBASE, NumUntrackedSize);
}
else
{
@@ -1478,8 +1482,8 @@ void GcInfoEncoder::Build()
_ASSERTE(pSlotDesc->IsRegister());
// Encode slot identification
- UINT32 currentNormRegNum = NORMALIZE_REGISTER(pSlotDesc->Slot.RegisterNumber);
- GCINFO_WRITE_VARL_U(m_Info1, currentNormRegNum, REGISTER_ENCBASE, RegSlotSize);
+ UINT32 currentNormRegNum = pSlotDesc->Slot.RegisterNumber;
+ GCINFO_WRITE_VARL_U(m_Info1, currentNormRegNum, GcInfoEncoding::REGISTER_ENCBASE, RegSlotSize);
GCINFO_WRITE(m_Info1, pSlotDesc->Flags, 2, RegSlotSize);
for(UINT32 j = 1; j < numRegisters; j++)
@@ -1495,17 +1499,17 @@ void GcInfoEncoder::Build()
while(pSlotDesc->IsDeleted());
_ASSERTE(pSlotDesc->IsRegister());
- currentNormRegNum = NORMALIZE_REGISTER(pSlotDesc->Slot.RegisterNumber);
+ currentNormRegNum = pSlotDesc->Slot.RegisterNumber;
if(lastFlags != GC_SLOT_IS_REGISTER)
{
- GCINFO_WRITE_VARL_U(m_Info1, currentNormRegNum, REGISTER_ENCBASE, RegSlotSize);
+ GCINFO_WRITE_VARL_U(m_Info1, currentNormRegNum, GcInfoEncoding::REGISTER_ENCBASE, RegSlotSize);
GCINFO_WRITE(m_Info1, pSlotDesc->Flags, 2, RegSlotSize);
}
else
{
_ASSERTE(pSlotDesc->Flags == GC_SLOT_IS_REGISTER);
- GCINFO_WRITE_VARL_U(m_Info1, currentNormRegNum - lastNormRegNum - 1, REGISTER_DELTA_ENCBASE, RegSlotSize);
+ GCINFO_WRITE_VARL_U(m_Info1, currentNormRegNum - lastNormRegNum - 1, GcInfoEncoding::REGISTER_DELTA_ENCBASE, RegSlotSize);
}
}
}
@@ -1525,8 +1529,8 @@ void GcInfoEncoder::Build()
// Encode slot identification
_ASSERTE((pSlotDesc->Slot.Stack.Base & ~3) == 0);
GCINFO_WRITE(m_Info1, pSlotDesc->Slot.Stack.Base, 2, StackSlotSize);
- INT32 currentNormStackSlot = NORMALIZE_STACK_SLOT(pSlotDesc->Slot.Stack.SpOffset);
- GCINFO_WRITE_VARL_S(m_Info1, currentNormStackSlot, STACK_SLOT_ENCBASE, StackSlotSize);
+ INT32 currentNormStackSlot = GcInfoEncoding::NORMALIZE_STACK_SLOT(pSlotDesc->Slot.Stack.SpOffset);
+ GCINFO_WRITE_VARL_S(m_Info1, currentNormStackSlot, GcInfoEncoding::STACK_SLOT_ENCBASE, StackSlotSize);
GCINFO_WRITE(m_Info1, pSlotDesc->Flags, 2, StackSlotSize);
@@ -1544,20 +1548,20 @@ void GcInfoEncoder::Build()
_ASSERTE(!pSlotDesc->IsRegister());
_ASSERTE(!pSlotDesc->IsUntracked());
- currentNormStackSlot = NORMALIZE_STACK_SLOT(pSlotDesc->Slot.Stack.SpOffset);
+ currentNormStackSlot = GcInfoEncoding::NORMALIZE_STACK_SLOT(pSlotDesc->Slot.Stack.SpOffset);
_ASSERTE((pSlotDesc->Slot.Stack.Base & ~3) == 0);
GCINFO_WRITE(m_Info1, pSlotDesc->Slot.Stack.Base, 2, StackSlotSize);
if(lastFlags != GC_SLOT_BASE)
{
- GCINFO_WRITE_VARL_S(m_Info1, currentNormStackSlot, STACK_SLOT_ENCBASE, StackSlotSize);
+ GCINFO_WRITE_VARL_S(m_Info1, currentNormStackSlot, GcInfoEncoding::STACK_SLOT_ENCBASE, StackSlotSize);
GCINFO_WRITE(m_Info1, pSlotDesc->Flags, 2, StackSlotSize);
}
else
{
_ASSERTE(pSlotDesc->Flags == GC_SLOT_BASE);
- GCINFO_WRITE_VARL_U(m_Info1, currentNormStackSlot - lastNormStackSlot, STACK_SLOT_DELTA_ENCBASE, StackSlotSize);
+ GCINFO_WRITE_VARL_U(m_Info1, currentNormStackSlot - lastNormStackSlot, GcInfoEncoding::STACK_SLOT_DELTA_ENCBASE, StackSlotSize);
}
}
}
@@ -1577,8 +1581,8 @@ void GcInfoEncoder::Build()
// Encode slot identification
_ASSERTE((pSlotDesc->Slot.Stack.Base & ~3) == 0);
GCINFO_WRITE(m_Info1, pSlotDesc->Slot.Stack.Base, 2, UntrackedSlotSize);
- INT32 currentNormStackSlot = NORMALIZE_STACK_SLOT(pSlotDesc->Slot.Stack.SpOffset);
- GCINFO_WRITE_VARL_S(m_Info1, currentNormStackSlot, STACK_SLOT_ENCBASE, UntrackedSlotSize);
+ INT32 currentNormStackSlot = GcInfoEncoding::NORMALIZE_STACK_SLOT(pSlotDesc->Slot.Stack.SpOffset);
+ GCINFO_WRITE_VARL_S(m_Info1, currentNormStackSlot, GcInfoEncoding::STACK_SLOT_ENCBASE, UntrackedSlotSize);
GCINFO_WRITE(m_Info1, pSlotDesc->Flags, 2, UntrackedSlotSize);
@@ -1596,20 +1600,20 @@ void GcInfoEncoder::Build()
_ASSERTE(!pSlotDesc->IsRegister());
_ASSERTE(pSlotDesc->IsUntracked());
- currentNormStackSlot = NORMALIZE_STACK_SLOT(pSlotDesc->Slot.Stack.SpOffset);
+ currentNormStackSlot = GcInfoEncoding::NORMALIZE_STACK_SLOT(pSlotDesc->Slot.Stack.SpOffset);
_ASSERTE((pSlotDesc->Slot.Stack.Base & ~3) == 0);
GCINFO_WRITE(m_Info1, pSlotDesc->Slot.Stack.Base, 2, UntrackedSlotSize);
if(lastFlags != GC_SLOT_UNTRACKED)
{
- GCINFO_WRITE_VARL_S(m_Info1, currentNormStackSlot, STACK_SLOT_ENCBASE, UntrackedSlotSize);
+ GCINFO_WRITE_VARL_S(m_Info1, currentNormStackSlot, GcInfoEncoding::STACK_SLOT_ENCBASE, UntrackedSlotSize);
GCINFO_WRITE(m_Info1, pSlotDesc->Flags, 2, UntrackedSlotSize);
}
else
{
_ASSERTE(pSlotDesc->Flags == GC_SLOT_UNTRACKED);
- GCINFO_WRITE_VARL_U(m_Info1, currentNormStackSlot - lastNormStackSlot, STACK_SLOT_DELTA_ENCBASE, UntrackedSlotSize);
+ GCINFO_WRITE_VARL_U(m_Info1, currentNormStackSlot - lastNormStackSlot, GcInfoEncoding::STACK_SLOT_DELTA_ENCBASE, UntrackedSlotSize);
}
}
}
@@ -1701,13 +1705,13 @@ void GcInfoEncoder::Build()
for (LiveStateHashTable::KeyIterator iter = hashMap.Begin(), end = hashMap.End(); !iter.Equal(end); iter.Next())
{
largestSetOffset = sizeofSets;
- sizeofSets += SizeofSlotStateVarLengthVector(*iter.Get(), LIVESTATE_RLE_SKIP_ENCBASE, LIVESTATE_RLE_RUN_ENCBASE);
+ sizeofSets += SizeofSlotStateVarLengthVector(*iter.Get(), GcInfoEncoding::LIVESTATE_RLE_SKIP_ENCBASE, GcInfoEncoding::LIVESTATE_RLE_RUN_ENCBASE);
}
// Now that we know the largest offset, we can figure out how much the indirection
// will cost us and commit
UINT32 numBitsPerPointer = ((largestSetOffset < 2) ? 1 : CeilOfLog2(largestSetOffset + 1));
- const size_t sizeofEncodedNumBitsPerPointer = BitStreamWriter::SizeofVarLengthUnsigned(numBitsPerPointer, POINTER_SIZE_ENCBASE);
+ const size_t sizeofEncodedNumBitsPerPointer = BitStreamWriter::SizeofVarLengthUnsigned(numBitsPerPointer, GcInfoEncoding::POINTER_SIZE_ENCBASE);
const size_t sizeofNoIndirection = m_NumCallSites * (numRegisters + numStackSlots);
const size_t sizeofIndirection = sizeofEncodedNumBitsPerPointer // Encode the pointer sizes
+ (m_NumCallSites * numBitsPerPointer) // Encode the pointers
@@ -1723,14 +1727,14 @@ void GcInfoEncoder::Build()
{
// we are using an indirection
GCINFO_WRITE(m_Info1, 1, 1, FlagsSize);
- GCINFO_WRITE_VARL_U(m_Info1, numBitsPerPointer - 1, POINTER_SIZE_ENCBASE, CallSiteStateSize);
+ GCINFO_WRITE_VARL_U(m_Info1, numBitsPerPointer - 1, GcInfoEncoding::POINTER_SIZE_ENCBASE, CallSiteStateSize);
// Now encode the live sets and record the real offset
for (LiveStateHashTable::KeyIterator iter = hashMap.Begin(), end = hashMap.End(); !iter.Equal(end); iter.Next())
{
_ASSERTE(FitsIn(m_Info2.GetBitCount()));
iter.SetValue((UINT32)m_Info2.GetBitCount());
- GCINFO_WRITE_VAR_VECTOR(m_Info2, *iter.Get(), LIVESTATE_RLE_SKIP_ENCBASE, LIVESTATE_RLE_RUN_ENCBASE, CallSiteStateSize);
+ GCINFO_WRITE_VAR_VECTOR(m_Info2, *iter.Get(), GcInfoEncoding::LIVESTATE_RLE_SKIP_ENCBASE, GcInfoEncoding::LIVESTATE_RLE_RUN_ENCBASE, CallSiteStateSize);
}
_ASSERTE(sizeofSets == m_Info2.GetBitCount());
@@ -1843,7 +1847,7 @@ void GcInfoEncoder::Build()
InterruptibleRange *pRange = &pRanges[i];
totalInterruptibleLength += pRange->NormStopOffset - pRange->NormStartOffset;
}
- _ASSERTE(totalInterruptibleLength <= NORMALIZE_CODE_OFFSET(m_CodeLength));
+ _ASSERTE(totalInterruptibleLength <= GcInfoEncoding::NORMALIZE_CODE_OFFSET(m_CodeLength));
liveState.ClearAll();
// Re-use couldBeLive
@@ -1944,14 +1948,14 @@ void GcInfoEncoder::Build()
pEndTransitions = pNextFree;
#else
- UINT32 totalInterruptibleLength = NORMALIZE_CODE_OFFSET(m_CodeLength);
+ UINT32 totalInterruptibleLength = GcInfoEncoding::NORMALIZE_CODE_OFFSET(m_CodeLength);
#endif //PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED
//
// Initialize chunk pointers
//
- UINT32 numChunks = (totalInterruptibleLength + NUM_NORM_CODE_OFFSETS_PER_CHUNK - 1) / NUM_NORM_CODE_OFFSETS_PER_CHUNK;
+ UINT32 numChunks = (totalInterruptibleLength + GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK - 1) / GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK;
_ASSERTE(numChunks > 0);
size_t* pChunkPointers = (size_t*) m_pAllocator->Alloc(numChunks*sizeof(size_t));
@@ -2008,12 +2012,12 @@ void GcInfoEncoder::Build()
pChunkPointers[currentChunk] = m_Info2.GetBitCount() + 1;
// Write couldBeLive slot map
- GCINFO_WRITE_VAR_VECTOR(m_Info2, couldBeLive, LIVESTATE_RLE_SKIP_ENCBASE, LIVESTATE_RLE_RUN_ENCBASE, ChunkMaskSize);
+ GCINFO_WRITE_VAR_VECTOR(m_Info2, couldBeLive, GcInfoEncoding::LIVESTATE_RLE_SKIP_ENCBASE, GcInfoEncoding::LIVESTATE_RLE_RUN_ENCBASE, ChunkMaskSize);
LOG((LF_GCINFO, LL_INFO100000,
"Chunk %d couldBeLive (%04x-%04x):\n", currentChunk,
- currentChunk * NUM_NORM_CODE_OFFSETS_PER_CHUNK,
- ((currentChunk + 1) * NUM_NORM_CODE_OFFSETS_PER_CHUNK) - 1
+ currentChunk * GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK,
+ ((currentChunk + 1) * GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK) - 1
));
// Write final state
@@ -2039,7 +2043,7 @@ void GcInfoEncoder::Build()
}
// Write transitions offsets
- UINT32 normChunkBaseCodeOffset = currentChunk * NUM_NORM_CODE_OFFSETS_PER_CHUNK;
+ UINT32 normChunkBaseCodeOffset = currentChunk * GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK;
LifetimeTransition* pT = pCurrent - numTransitionsInCurrentChunk;
@@ -2068,10 +2072,10 @@ void GcInfoEncoder::Build()
// Don't encode transitions at offset 0 as they are useless
if(normCodeOffsetDelta)
{
- _ASSERTE(normCodeOffsetDelta < NUM_NORM_CODE_OFFSETS_PER_CHUNK);
+ _ASSERTE(normCodeOffsetDelta < GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK);
GCINFO_WRITE(m_Info2, 1, 1, ChunkTransitionSize);
- GCINFO_WRITE(m_Info2, normCodeOffsetDelta, NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2, ChunkTransitionSize);
+ GCINFO_WRITE(m_Info2, normCodeOffsetDelta, GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2, ChunkTransitionSize);
#ifdef MEASURE_GCINFO
m_CurrentMethodSize.NumTransitions++;
@@ -2105,7 +2109,7 @@ void GcInfoEncoder::Build()
}
UINT32 numBitsPerPointer = CeilOfLog2(largestPointer + 1);
- GCINFO_WRITE_VARL_U(m_Info1, numBitsPerPointer, POINTER_SIZE_ENCBASE, ChunkPtrSize);
+ GCINFO_WRITE_VARL_U(m_Info1, numBitsPerPointer, GcInfoEncoding::POINTER_SIZE_ENCBASE, ChunkPtrSize);
if(numBitsPerPointer)
{
@@ -2173,7 +2177,7 @@ lExitSuccess:;
#endif
}
-void GcInfoEncoder::SizeofSlotStateVarLengthVector(const BitArray &vector,
+template void TGcInfoEncoder::SizeofSlotStateVarLengthVector(const BitArray &vector,
UINT32 baseSkip,
UINT32 baseRun,
UINT32 *pSizeofSimple,
@@ -2248,7 +2252,7 @@ void GcInfoEncoder::SizeofSlotStateVarLengthVector(const BitArray &vector,
*pSizeofRLENeg = sizeofRLENeg;
}
-UINT32 GcInfoEncoder::SizeofSlotStateVarLengthVector(const BitArray &vector,
+template UINT32 TGcInfoEncoder::SizeofSlotStateVarLengthVector(const BitArray &vector,
UINT32 baseSkip,
UINT32 baseRun)
{
@@ -2265,7 +2269,7 @@ UINT32 GcInfoEncoder::SizeofSlotStateVarLengthVector(const BitArray &vector,
return sizeofRLENeg;
}
-UINT32 GcInfoEncoder::WriteSlotStateVarLengthVector(BitStreamWriter &writer,
+template UINT32 TGcInfoEncoder::WriteSlotStateVarLengthVector(BitStreamWriter &writer,
const BitArray &vector,
UINT32 baseSkip,
UINT32 baseRun)
@@ -2355,7 +2359,7 @@ UINT32 GcInfoEncoder::WriteSlotStateVarLengthVector(BitStreamWriter &writer,
}
-void GcInfoEncoder::EliminateRedundantLiveDeadPairs(LifetimeTransition** ppTransitions,
+template void TGcInfoEncoder::EliminateRedundantLiveDeadPairs(LifetimeTransition** ppTransitions,
size_t* pNumTransitions,
LifetimeTransition** ppEndTransitions)
{
@@ -2410,7 +2414,7 @@ void GcInfoEncoder::EliminateRedundantLiveDeadPairs(LifetimeTransition** ppTrans
// Write encoded information to its final destination and frees temporary buffers.
// The encoder shouldn't be used anymore after calling this method.
//
-BYTE* GcInfoEncoder::Emit()
+template BYTE* TGcInfoEncoder::Emit()
{
size_t cbGcInfoSize = m_Info1.GetByteCount() +
m_Info2.GetByteCount();
@@ -2439,13 +2443,13 @@ BYTE* GcInfoEncoder::Emit()
return destBuffer;
}
-void * GcInfoEncoder::eeAllocGCInfo (size_t blockSize)
+template void * TGcInfoEncoder::eeAllocGCInfo (size_t blockSize)
{
m_BlockSize = blockSize;
return m_pCorJitInfo->allocGCInfo(blockSize);
}
-size_t GcInfoEncoder::GetEncodedGCInfoSize() const
+template size_t TGcInfoEncoder::GetEncodedGCInfoSize() const
{
return m_BlockSize;
}
@@ -2616,3 +2620,5 @@ int BitStreamWriter::EncodeVarLengthSigned( SSIZE_T n, UINT32 base )
}
}
+// Instantiate the encoder so other files can use it
+template class TGcInfoEncoder;
diff --git a/src/coreclr/inc/check.h b/src/coreclr/inc/check.h
index 21d717c13e6bb7..5a7218cc652d77 100644
--- a/src/coreclr/inc/check.h
+++ b/src/coreclr/inc/check.h
@@ -723,7 +723,9 @@ CHECK CheckOverflow(UINT64 value1, UINT64 value2);
#ifdef __APPLE__
CHECK CheckOverflow(SIZE_T value1, SIZE_T value2);
#endif
+#ifndef __wasm__
CHECK CheckOverflow(PTR_CVOID address, UINT offset);
+#endif
#if defined(_MSC_VER)
CHECK CheckOverflow(const void *address, ULONG offset);
#endif
diff --git a/src/coreclr/inc/check.inl b/src/coreclr/inc/check.inl
index 34a2956d1be6e2..b0f65c5d218bbd 100644
--- a/src/coreclr/inc/check.inl
+++ b/src/coreclr/inc/check.inl
@@ -156,7 +156,7 @@ inline CHECK CheckAligned(UINT64 value, UINT alignment)
CHECK_OK;
}
-#ifdef __APPLE__
+#if defined(__APPLE__) || defined(__wasm__)
inline CHECK CheckAligned(SIZE_T value, UINT alignment)
{
STATIC_CONTRACT_WRAPPER;
@@ -237,7 +237,7 @@ inline CHECK CheckOverflow(const void *address, UINT64 offset)
CHECK_OK;
}
-#ifdef __APPLE__
+#if defined(__APPLE__) || defined(__wasm__)
inline CHECK CheckOverflow(const void *address, SIZE_T offset)
{
CHECK((UINT64) address + offset >= (UINT64) address);
@@ -316,10 +316,11 @@ inline CHECK CheckUnderflow(const void *address, UINT64 offset)
CHECK_OK;
}
-#ifdef __APPLE__
+#if defined(__APPLE__) || defined(__wasm__)
inline CHECK CheckUnderflow(const void *address, SIZE_T offset)
{
-#if POINTER_BITS == 32
+ // SIZE_T is 32bit on wasm32
+#if !defined(__wasm__) && POINTER_BITS == 32
CHECK(offset >> 32 == 0);
CHECK((UINT) (SIZE_T) address - (UINT) offset <= (UINT) (SIZE_T) address);
#else
diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h
index bf28ce3fc3e757..8bdde4c7baa642 100644
--- a/src/coreclr/inc/clrconfigvalues.h
+++ b/src/coreclr/inc/clrconfigvalues.h
@@ -688,6 +688,9 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_GDBJitEmitDebugFrame, W("GDBJitEmitDebugFrame"
#endif
RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_MaxVectorTBitWidth, W("MaxVectorTBitWidth"), 0, "The maximum decimal width, in bits, that Vector is allowed to be. A value less than 128 is treated as the system default.", CLRConfig::LookupOptions::ParseIntegerAsBase10)
+#if defined(TARGET_AMD64) || defined(TARGET_X86)
+RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_PreferredVectorBitWidth, W("PreferredVectorBitWidth"), 0, "The maximum decimal width, in bits, of fixed-width vectors that may be considered hardware accelerated. A value less than 128 is treated as the system default.", CLRConfig::LookupOptions::ParseIntegerAsBase10)
+#endif // defined(TARGET_AMD64) || defined(TARGET_X86)
//
// Hardware Intrinsic ISAs; keep in sync with jitconfigvalues.h
diff --git a/src/coreclr/inc/clrnt.h b/src/coreclr/inc/clrnt.h
index 2d935a95317e69..bcd4427538babe 100644
--- a/src/coreclr/inc/clrnt.h
+++ b/src/coreclr/inc/clrnt.h
@@ -501,4 +501,48 @@ RtlVirtualUnwind(
#endif // TARGET_RISCV64
+#ifdef TARGET_WASM
+//
+// Define unwind information flags.
+//
+
+#define UNW_FLAG_NHANDLER 0x0 /* any handler */
+#define UNW_FLAG_EHANDLER 0x1 /* filter handler */
+#define UNW_FLAG_UHANDLER 0x2 /* unwind handler */
+
+PEXCEPTION_ROUTINE
+RtlVirtualUnwind (
+ _In_ DWORD HandlerType,
+ _In_ DWORD ImageBase,
+ _In_ DWORD ControlPc,
+ _In_ PRUNTIME_FUNCTION FunctionEntry,
+ __inout PT_CONTEXT ContextRecord,
+ _Out_ PVOID *HandlerData,
+ _Out_ PDWORD EstablisherFrame,
+ __inout_opt PT_KNONVOLATILE_CONTEXT_POINTERS ContextPointers
+ );
+
+FORCEINLINE
+ULONG
+RtlpGetFunctionEndAddress (
+ _In_ PT_RUNTIME_FUNCTION FunctionEntry,
+ _In_ TADDR ImageBase
+ )
+{
+ _ASSERTE("The function RtlpGetFunctionEndAddress is not implemented on wasm");
+ return 0;
+}
+
+#define RUNTIME_FUNCTION__BeginAddress(FunctionEntry) ((FunctionEntry)->BeginAddress)
+#define RUNTIME_FUNCTION__SetBeginAddress(FunctionEntry,address) ((FunctionEntry)->BeginAddress = (address))
+
+#define RUNTIME_FUNCTION__EndAddress(FunctionEntry, ImageBase) (RtlpGetFunctionEndAddress(FunctionEntry, (ULONG64)(ImageBase)))
+
+#define RUNTIME_FUNCTION__SetUnwindInfoAddress(prf,address) do { (prf)->UnwindData = (address); } while (0)
+
+typedef struct _UNWIND_INFO {
+ // dummy
+} UNWIND_INFO, *PUNWIND_INFO;
+#endif
+
#endif // CLRNT_H_
diff --git a/src/coreclr/inc/clrtypes.h b/src/coreclr/inc/clrtypes.h
index 9094e4932a2527..b1990054c48738 100644
--- a/src/coreclr/inc/clrtypes.h
+++ b/src/coreclr/inc/clrtypes.h
@@ -338,7 +338,7 @@ inline UINT64 AlignUp(UINT64 value, UINT alignment)
return (value+alignment-1)&~(UINT64)(alignment-1);
}
-#ifdef __APPLE__
+#if defined(__APPLE__) || defined(__wasm__)
inline SIZE_T AlignUp(SIZE_T value, UINT alignment)
{
STATIC_CONTRACT_LEAF;
@@ -399,13 +399,13 @@ inline UINT AlignmentPad(UINT64 value, UINT alignment)
return (UINT) (AlignUp(value, alignment) - value);
}
-#ifdef __APPLE__
+#if defined(__APPLE__) || defined(__wasm__)
inline UINT AlignmentPad(SIZE_T value, UINT alignment)
{
STATIC_CONTRACT_WRAPPER;
return (UINT) (AlignUp(value, alignment) - value);
}
-#endif // __APPLE__
+#endif // __APPLE__ || __wasm__
inline UINT AlignmentTrim(UINT value, UINT alignment)
{
@@ -432,7 +432,7 @@ inline UINT AlignmentTrim(UINT64 value, UINT alignment)
return ((UINT)value)&(alignment-1);
}
-#ifdef __APPLE__
+#if defined(__APPLE__) || defined(__wasm__)
inline UINT AlignmentTrim(SIZE_T value, UINT alignment)
{
STATIC_CONTRACT_LEAF;
diff --git a/src/coreclr/inc/corcompile.h b/src/coreclr/inc/corcompile.h
index 845b72465c3439..57ca94832e6be7 100644
--- a/src/coreclr/inc/corcompile.h
+++ b/src/coreclr/inc/corcompile.h
@@ -56,6 +56,11 @@ inline ReadyToRunCrossModuleInlineFlags operator &( const ReadyToRunCrossModuleI
return static_cast(static_cast(left) & static_cast(right));
}
+#ifdef TARGET_WASM
+// why was it defined only for x86 before?
+typedef DPTR(RUNTIME_FUNCTION) PTR_RUNTIME_FUNCTION;
+#endif
+
#ifdef TARGET_X86
typedef DPTR(RUNTIME_FUNCTION) PTR_RUNTIME_FUNCTION;
diff --git a/src/coreclr/inc/cordebuginfo.h b/src/coreclr/inc/cordebuginfo.h
index b3125060f308eb..b6979c52f3ac5c 100644
--- a/src/coreclr/inc/cordebuginfo.h
+++ b/src/coreclr/inc/cordebuginfo.h
@@ -213,6 +213,8 @@ class ICorDebugInfo
REGNUM_T5,
REGNUM_T6,
REGNUM_PC,
+#elif TARGET_WASM
+ REGNUM_PC, // wasm doesn't have registers
#else
PORTABILITY_WARNING("Register numbers not defined on this platform")
#endif
diff --git a/src/coreclr/inc/corjitflags.h b/src/coreclr/inc/corjitflags.h
index b7de9711f07f79..f6eb983953f95e 100644
--- a/src/coreclr/inc/corjitflags.h
+++ b/src/coreclr/inc/corjitflags.h
@@ -64,10 +64,6 @@ class CORJIT_FLAGS
CORJIT_FLAG_SOFTFP_ABI = 30, // Enable armel calling convention
#endif
-#if defined(TARGET_X86) || defined(TARGET_AMD64)
- CORJIT_FLAG_VECTOR512_THROTTLING = 31, // On x86/x64, 512-bit vector usage may incur CPU frequency throttling
-#endif
-
};
CORJIT_FLAGS()
diff --git a/src/coreclr/inc/crosscomp.h b/src/coreclr/inc/crosscomp.h
index c3c1f97ddeedea..36081dee778ab4 100644
--- a/src/coreclr/inc/crosscomp.h
+++ b/src/coreclr/inc/crosscomp.h
@@ -721,6 +721,8 @@ typedef struct _T_KNONVOLATILE_CONTEXT_POINTERS {
#define DAC_CS_NATIVE_DATA_SIZE 48
#elif defined(TARGET_HAIKU) && defined(TARGET_AMD64)
#define DAC_CS_NATIVE_DATA_SIZE 56
+#elif defined(TARGET_WASM)
+#define DAC_CS_NATIVE_DATA_SIZE 76
#else
#warning
#error DAC_CS_NATIVE_DATA_SIZE is not defined for this architecture. This should be same value as PAL_CS_NATIVE_DATA_SIZE (aka sizeof(PAL_CS_NATIVE_DATA)).
diff --git a/src/coreclr/inc/debugmacros.h b/src/coreclr/inc/debugmacros.h
index 35592ea36b1214..d5ed757ae77aab 100644
--- a/src/coreclr/inc/debugmacros.h
+++ b/src/coreclr/inc/debugmacros.h
@@ -13,6 +13,7 @@
#include "stacktrace.h"
#include "debugmacrosext.h"
#include "palclr.h"
+#include
#undef _ASSERTE
#undef VERIFY
diff --git a/src/coreclr/inc/debugreturn.h b/src/coreclr/inc/debugreturn.h
index d052364ff89057..7047b19ed0f3c0 100644
--- a/src/coreclr/inc/debugreturn.h
+++ b/src/coreclr/inc/debugreturn.h
@@ -96,11 +96,13 @@ typedef __SafeToReturn __ReturnOK;
// build. (And, in fastchecked, there is no penalty at all.)
//
#ifdef _MSC_VER
-#define return if (0 && __ReturnOK::safe_to_return()) { } else return
+#define debug_instrumented_return if (0 && __ReturnOK::safe_to_return()) { } else return
#else // _MSC_VER
-#define return for (;1;__ReturnOK::safe_to_return()) return
+#define debug_instrumented_return for (;1;__ReturnOK::safe_to_return()) return
#endif // _MSC_VER
+#define return debug_instrumented_return
+
#define DEBUG_ASSURE_NO_RETURN_BEGIN(arg) { typedef __YouCannotUseAReturnStatementHere __ReturnOK; if (0 && __ReturnOK::used()) { } else {
#define DEBUG_ASSURE_NO_RETURN_END(arg) } }
diff --git a/src/coreclr/inc/gcinfodecoder.h b/src/coreclr/inc/gcinfodecoder.h
index 0b51833ee19d5e..2e3b8e7b6b905c 100644
--- a/src/coreclr/inc/gcinfodecoder.h
+++ b/src/coreclr/inc/gcinfodecoder.h
@@ -465,6 +465,8 @@ struct GcSlotDesc
GcSlotFlags Flags;
};
+
+template
class GcSlotDecoder
{
public:
@@ -507,12 +509,13 @@ class GcSlotDecoder
};
#ifdef USE_GC_INFO_DECODER
-class GcInfoDecoder
+template
+class TGcInfoDecoder
{
public:
// If you are not interested in interruptibility or gc lifetime information, pass 0 as instructionOffset
- GcInfoDecoder(
+ TGcInfoDecoder(
GCInfoToken gcInfoToken,
GcInfoDecoderFlags flags = DECODE_EVERYTHING,
UINT32 instructionOffset = 0
@@ -532,7 +535,7 @@ class GcInfoDecoder
// This is used for gcinfodumper
bool IsSafePoint(UINT32 codeOffset);
- typedef void EnumerateSafePointsCallback (GcInfoDecoder* decoder, UINT32 offset, void * hCallback);
+ typedef void EnumerateSafePointsCallback (TGcInfoDecoder * decoder, UINT32 offset, void * hCallback);
void EnumerateSafePoints(EnumerateSafePointsCallback * pCallback, void * hCallback);
#endif
@@ -661,7 +664,7 @@ class GcInfoDecoder
bool IsScratchStackSlot(INT32 spOffset, GcStackSlotBase spBase, PREGDISPLAY pRD);
void ReportUntrackedSlots(
- GcSlotDecoder& slotDecoder,
+ GcSlotDecoder& slotDecoder,
PREGDISPLAY pRD,
unsigned flags,
GCEnumCallback pCallBack,
@@ -689,7 +692,7 @@ class GcInfoDecoder
inline void ReportSlotToGC(
- GcSlotDecoder& slotDecoder,
+ GcSlotDecoder& slotDecoder,
UINT32 slotIndex,
PREGDISPLAY pRD,
bool reportScratchSlots,
@@ -746,6 +749,9 @@ class GcInfoDecoder
}
}
};
+
+typedef TGcInfoDecoder GcInfoDecoder;
+
#endif // USE_GC_INFO_DECODER
diff --git a/src/coreclr/inc/gcinfoencoder.h b/src/coreclr/inc/gcinfoencoder.h
index 3777e1b7064bb2..f147d9566e9e87 100644
--- a/src/coreclr/inc/gcinfoencoder.h
+++ b/src/coreclr/inc/gcinfoencoder.h
@@ -315,16 +315,6 @@ class BitStreamWriter
typedef UINT32 GcSlotId;
-inline UINT32 GetNormCodeOffsetChunk(UINT32 normCodeOffset)
-{
- return normCodeOffset / NUM_NORM_CODE_OFFSETS_PER_CHUNK;
-}
-
-inline UINT32 GetCodeOffsetChunk(UINT32 codeOffset)
-{
- return (NORMALIZE_CODE_OFFSET(codeOffset)) / NUM_NORM_CODE_OFFSETS_PER_CHUNK;
-}
-
enum GENERIC_CONTEXTPARAM_TYPE
{
GENERIC_CONTEXTPARAM_NONE = 0,
@@ -335,18 +325,8 @@ enum GENERIC_CONTEXTPARAM_TYPE
extern void DECLSPEC_NORETURN ThrowOutOfMemory();
-class GcInfoEncoder
+namespace GcInfoEncoderExt
{
-public:
- typedef void (*NoMemoryFunction)(void);
-
- GcInfoEncoder(
- ICorJitInfo* pCorJitInfo,
- CORINFO_METHOD_INFO* pMethodInfo,
- IAllocator* pJitAllocator,
- NoMemoryFunction pNoMem = ::ThrowOutOfMemory
- );
-
struct LifetimeTransition
{
UINT32 CodeOffset;
@@ -354,7 +334,20 @@ class GcInfoEncoder
BYTE BecomesLive;
BYTE IsDeleted;
};
+}
+template
+class TGcInfoEncoder
+{
+public:
+ typedef void (*NoMemoryFunction)(void);
+
+ TGcInfoEncoder(
+ ICorJitInfo* pCorJitInfo,
+ CORINFO_METHOD_INFO* pMethodInfo,
+ IAllocator* pJitAllocator,
+ NoMemoryFunction pNoMem = ::ThrowOutOfMemory
+ );
#ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED
void DefineCallSites(UINT32* pCallSites, BYTE* pCallSiteSizes, UINT32 numCallSites);
@@ -488,7 +481,7 @@ class GcInfoEncoder
BitStreamWriter m_Info2; // Used for chunk encodings
GcInfoArrayList m_InterruptibleRanges;
- GcInfoArrayList m_LifetimeTransitions;
+ GcInfoArrayList m_LifetimeTransitions;
bool m_IsVarArg;
#if defined(TARGET_AMD64)
@@ -548,9 +541,14 @@ class GcInfoEncoder
// new array, and copying the non-removed elements into it. If it does this, sets "*ppTransitions" to
// point to the new array, "*pNumTransitions" to its shorted length, and "*ppEndTransitions" to
// point one beyond the used portion of this array.
- void EliminateRedundantLiveDeadPairs(LifetimeTransition** ppTransitions,
+ void EliminateRedundantLiveDeadPairs(GcInfoEncoderExt::LifetimeTransition** ppTransitions,
size_t* pNumTransitions,
- LifetimeTransition** ppEndTransitions);
+ GcInfoEncoderExt::LifetimeTransition** ppEndTransitions);
+
+ static inline UINT32 GetNormCodeOffsetChunk(UINT32 normCodeOffset)
+ {
+ return normCodeOffset / GcInfoEncoding::NUM_NORM_CODE_OFFSETS_PER_CHUNK;
+ }
#ifdef _DEBUG
bool m_IsSlotTableFrozen;
@@ -561,4 +559,6 @@ class GcInfoEncoder
#endif
};
+typedef TGcInfoEncoder GcInfoEncoder;
+
#endif // !__GCINFOENCODER_H__
diff --git a/src/coreclr/inc/gcinfotypes.h b/src/coreclr/inc/gcinfotypes.h
index e1f7b517897a3f..36164759e54429 100644
--- a/src/coreclr/inc/gcinfotypes.h
+++ b/src/coreclr/inc/gcinfotypes.h
@@ -5,6 +5,11 @@
#ifndef __GCINFOTYPES_H__
#define __GCINFOTYPES_H__
+// HACK: debugreturn.h breaks constexpr
+#ifdef debug_instrumented_return
+#undef return
+#endif // debug_instrumented_return
+
#ifndef FEATURE_NATIVEAOT
#include "gcinfo.h"
#endif
@@ -14,7 +19,7 @@
#endif // _MSC_VER
// *****************************************************************************
-// WARNING!!!: These values and code are used in the runtime repo and SOS in the
+// WARNING!!!: These values and code are used in the runtime repo and SOS in the
// diagnostics repo. Should updated in a backwards and forwards compatible way.
// See: https://github.com/dotnet/diagnostics/blob/main/src/shared/inc/gcinfotypes.h
// https://github.com/dotnet/runtime/blob/main/src/coreclr/inc/gcinfotypes.h
@@ -612,274 +617,283 @@ void FASTCALL decodeCallPattern(int pattern,
#ifndef TARGET_POINTER_SIZE
#define TARGET_POINTER_SIZE 8 // equal to sizeof(void*) and the managed pointer size in bytes for this target
#endif
-#define NUM_NORM_CODE_OFFSETS_PER_CHUNK (64)
-#define NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 (6)
-#define NORMALIZE_STACK_SLOT(x) ((x)>>3)
-#define DENORMALIZE_STACK_SLOT(x) ((x)<<3)
-#define NORMALIZE_CODE_LENGTH(x) (x)
-#define DENORMALIZE_CODE_LENGTH(x) (x)
-// Encode RBP as 0
-#define NORMALIZE_STACK_BASE_REGISTER(x) ((x) ^ 5)
-#define DENORMALIZE_STACK_BASE_REGISTER(x) ((x) ^ 5)
-#define NORMALIZE_SIZE_OF_STACK_AREA(x) ((x)>>3)
-#define DENORMALIZE_SIZE_OF_STACK_AREA(x) ((x)<<3)
-#define CODE_OFFSETS_NEED_NORMALIZATION 0
-#define NORMALIZE_CODE_OFFSET(x) (x)
-#define DENORMALIZE_CODE_OFFSET(x) (x)
-#define NORMALIZE_REGISTER(x) (x)
-#define DENORMALIZE_REGISTER(x) (x)
-#define NORMALIZE_NUM_SAFE_POINTS(x) (x)
-#define DENORMALIZE_NUM_SAFE_POINTS(x) (x)
-#define NORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x)
-#define DENORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x)
-
-#define PSP_SYM_STACK_SLOT_ENCBASE 6
-#define GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE 6
-#define SECURITY_OBJECT_STACK_SLOT_ENCBASE 6
-#define GS_COOKIE_STACK_SLOT_ENCBASE 6
-#define CODE_LENGTH_ENCBASE 8
-#define STACK_BASE_REGISTER_ENCBASE 3
-#define SIZE_OF_STACK_AREA_ENCBASE 3
-#define SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE 4
-#define REVERSE_PINVOKE_FRAME_ENCBASE 6
-#define NUM_REGISTERS_ENCBASE 2
-#define NUM_STACK_SLOTS_ENCBASE 2
-#define NUM_UNTRACKED_SLOTS_ENCBASE 1
-#define NORM_PROLOG_SIZE_ENCBASE 5
-#define NORM_EPILOG_SIZE_ENCBASE 3
-#define NORM_CODE_OFFSET_DELTA_ENCBASE 3
-#define INTERRUPTIBLE_RANGE_DELTA1_ENCBASE 6
-#define INTERRUPTIBLE_RANGE_DELTA2_ENCBASE 6
-#define REGISTER_ENCBASE 3
-#define REGISTER_DELTA_ENCBASE 2
-#define STACK_SLOT_ENCBASE 6
-#define STACK_SLOT_DELTA_ENCBASE 4
-#define NUM_SAFE_POINTS_ENCBASE 2
-#define NUM_INTERRUPTIBLE_RANGES_ENCBASE 1
-#define NUM_EH_CLAUSES_ENCBASE 2
-#define POINTER_SIZE_ENCBASE 3
-#define LIVESTATE_RLE_RUN_ENCBASE 2
-#define LIVESTATE_RLE_SKIP_ENCBASE 4
+
+#define TargetGcInfoEncoding AMD64GcInfoEncoding
+
+struct AMD64GcInfoEncoding {
+ static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK = (64);
+
+ static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 = (6);
+ static inline constexpr int32_t NORMALIZE_STACK_SLOT (int32_t x) { return ((x)>>3); }
+ static inline constexpr int32_t DENORMALIZE_STACK_SLOT (int32_t x) { return ((x)<<3); }
+ static inline constexpr uint32_t NORMALIZE_CODE_LENGTH (uint32_t x) { return (x); }
+ static inline constexpr uint32_t DENORMALIZE_CODE_LENGTH (uint32_t x) { return (x); }
+
+ // Encode RBP as 0
+ static inline constexpr uint32_t NORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((x) ^ 5); }
+ static inline constexpr uint32_t DENORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((x) ^ 5); }
+ static inline constexpr uint32_t NORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)>>3); }
+ static inline constexpr uint32_t DENORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)<<3); }
+ static const bool CODE_OFFSETS_NEED_NORMALIZATION = false;
+ static inline constexpr uint32_t NORMALIZE_CODE_OFFSET (uint32_t x) { return (x); }
+ static inline constexpr uint32_t DENORMALIZE_CODE_OFFSET (uint32_t x) { return (x); }
+
+ static const int PSP_SYM_STACK_SLOT_ENCBASE = 6;
+ static const int GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE = 6;
+ static const int SECURITY_OBJECT_STACK_SLOT_ENCBASE = 6;
+ static const int GS_COOKIE_STACK_SLOT_ENCBASE = 6;
+ static const int CODE_LENGTH_ENCBASE = 8;
+ static const int STACK_BASE_REGISTER_ENCBASE = 3;
+ static const int SIZE_OF_STACK_AREA_ENCBASE = 3;
+ static const int SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE = 4;
+ static const int REVERSE_PINVOKE_FRAME_ENCBASE = 6;
+ static const int NUM_REGISTERS_ENCBASE = 2;
+ static const int NUM_STACK_SLOTS_ENCBASE = 2;
+ static const int NUM_UNTRACKED_SLOTS_ENCBASE = 1;
+ static const int NORM_PROLOG_SIZE_ENCBASE = 5;
+ static const int NORM_EPILOG_SIZE_ENCBASE = 3;
+ static const int NORM_CODE_OFFSET_DELTA_ENCBASE = 3;
+ static const int INTERRUPTIBLE_RANGE_DELTA1_ENCBASE = 6;
+ static const int INTERRUPTIBLE_RANGE_DELTA2_ENCBASE = 6;
+ static const int REGISTER_ENCBASE = 3;
+ static const int REGISTER_DELTA_ENCBASE = 2;
+ static const int STACK_SLOT_ENCBASE = 6;
+ static const int STACK_SLOT_DELTA_ENCBASE = 4;
+ static const int NUM_SAFE_POINTS_ENCBASE = 2;
+ static const int NUM_INTERRUPTIBLE_RANGES_ENCBASE = 1;
+ static const int NUM_EH_CLAUSES_ENCBASE = 2;
+ static const int POINTER_SIZE_ENCBASE = 3;
+ static const int LIVESTATE_RLE_RUN_ENCBASE = 2;
+ static const int LIVESTATE_RLE_SKIP_ENCBASE = 4;
+};
#elif defined(TARGET_ARM)
#ifndef TARGET_POINTER_SIZE
#define TARGET_POINTER_SIZE 4 // equal to sizeof(void*) and the managed pointer size in bytes for this target
#endif
-#define NUM_NORM_CODE_OFFSETS_PER_CHUNK (64)
-#define NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 (6)
-#define NORMALIZE_STACK_SLOT(x) ((x)>>2)
-#define DENORMALIZE_STACK_SLOT(x) ((x)<<2)
-#define NORMALIZE_CODE_LENGTH(x) ((x)>>1)
-#define DENORMALIZE_CODE_LENGTH(x) ((x)<<1)
-// Encode R11 as zero
-#define NORMALIZE_STACK_BASE_REGISTER(x) ((((x) - 4) & 7) ^ 7)
-#define DENORMALIZE_STACK_BASE_REGISTER(x) (((x) ^ 7) + 4)
-#define NORMALIZE_SIZE_OF_STACK_AREA(x) ((x)>>2)
-#define DENORMALIZE_SIZE_OF_STACK_AREA(x) ((x)<<2)
-#define CODE_OFFSETS_NEED_NORMALIZATION 1
-#define NORMALIZE_CODE_OFFSET(x) ((x)>>1) // Instructions are 2/4 bytes long in Thumb/ARM states,
-#define DENORMALIZE_CODE_OFFSET(x) ((x)<<1)
-#define NORMALIZE_REGISTER(x) (x)
-#define DENORMALIZE_REGISTER(x) (x)
-#define NORMALIZE_NUM_SAFE_POINTS(x) (x)
-#define DENORMALIZE_NUM_SAFE_POINTS(x) (x)
-#define NORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x)
-#define DENORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x)
-
-// The choices of these encoding bases only affects space overhead
-// and performance, not semantics/correctness.
-#define PSP_SYM_STACK_SLOT_ENCBASE 5
-#define GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE 5
-#define SECURITY_OBJECT_STACK_SLOT_ENCBASE 5
-#define GS_COOKIE_STACK_SLOT_ENCBASE 5
-#define CODE_LENGTH_ENCBASE 7
-#define STACK_BASE_REGISTER_ENCBASE 1
-#define SIZE_OF_STACK_AREA_ENCBASE 3
-#define SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE 3
-#define REVERSE_PINVOKE_FRAME_ENCBASE 5
-#define NUM_REGISTERS_ENCBASE 2
-#define NUM_STACK_SLOTS_ENCBASE 3
-#define NUM_UNTRACKED_SLOTS_ENCBASE 3
-#define NORM_PROLOG_SIZE_ENCBASE 5
-#define NORM_EPILOG_SIZE_ENCBASE 3
-#define NORM_CODE_OFFSET_DELTA_ENCBASE 3
-#define INTERRUPTIBLE_RANGE_DELTA1_ENCBASE 4
-#define INTERRUPTIBLE_RANGE_DELTA2_ENCBASE 6
-#define REGISTER_ENCBASE 2
-#define REGISTER_DELTA_ENCBASE 1
-#define STACK_SLOT_ENCBASE 6
-#define STACK_SLOT_DELTA_ENCBASE 4
-#define NUM_SAFE_POINTS_ENCBASE 3
-#define NUM_INTERRUPTIBLE_RANGES_ENCBASE 2
-#define NUM_EH_CLAUSES_ENCBASE 3
-#define POINTER_SIZE_ENCBASE 3
-#define LIVESTATE_RLE_RUN_ENCBASE 2
-#define LIVESTATE_RLE_SKIP_ENCBASE 4
+
+#define TargetGcInfoEncoding ARM32GcInfoEncoding
+
+struct ARM32GcInfoEncoding {
+ static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK = (64);
+ static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 = (6);
+ static inline constexpr int32_t NORMALIZE_STACK_SLOT (int32_t x) { return ((x)>>2); }
+ static inline constexpr int32_t DENORMALIZE_STACK_SLOT (int32_t x) { return ((x)<<2); }
+ static inline constexpr uint32_t NORMALIZE_CODE_LENGTH (uint32_t x) { return ((x)>>1); }
+ static inline constexpr uint32_t DENORMALIZE_CODE_LENGTH (uint32_t x) { return ((x)<<1); }
+ // Encode R11 as zero
+ static inline constexpr uint32_t NORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((((x) - 4) & 7) ^ 7); }
+ static inline constexpr uint32_t DENORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return (((x) ^ 7) + 4); }
+ static inline constexpr uint32_t NORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)>>2); }
+ static inline constexpr uint32_t DENORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)<<2); }
+ static const bool CODE_OFFSETS_NEED_NORMALIZATION = true;
+ static inline constexpr uint32_t NORMALIZE_CODE_OFFSET (uint32_t x) { return ((x)>>1) /* Instructions are 2/4 bytes long in Thumb/ARM states */; }
+ static inline constexpr uint32_t DENORMALIZE_CODE_OFFSET (uint32_t x) { return ((x)<<1); }
+
+ // The choices of these encoding bases only affects space overhead
+ // and performance, not semantics/correctness.
+ static const int PSP_SYM_STACK_SLOT_ENCBASE = 5;
+ static const int GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE = 5;
+ static const int SECURITY_OBJECT_STACK_SLOT_ENCBASE = 5;
+ static const int GS_COOKIE_STACK_SLOT_ENCBASE = 5;
+ static const int CODE_LENGTH_ENCBASE = 7;
+ static const int STACK_BASE_REGISTER_ENCBASE = 1;
+ static const int SIZE_OF_STACK_AREA_ENCBASE = 3;
+ static const int SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE = 3;
+ static const int REVERSE_PINVOKE_FRAME_ENCBASE = 5;
+ static const int NUM_REGISTERS_ENCBASE = 2;
+ static const int NUM_STACK_SLOTS_ENCBASE = 3;
+ static const int NUM_UNTRACKED_SLOTS_ENCBASE = 3;
+ static const int NORM_PROLOG_SIZE_ENCBASE = 5;
+ static const int NORM_EPILOG_SIZE_ENCBASE = 3;
+ static const int NORM_CODE_OFFSET_DELTA_ENCBASE = 3;
+ static const int INTERRUPTIBLE_RANGE_DELTA1_ENCBASE = 4;
+ static const int INTERRUPTIBLE_RANGE_DELTA2_ENCBASE = 6;
+ static const int REGISTER_ENCBASE = 2;
+ static const int REGISTER_DELTA_ENCBASE = 1;
+ static const int STACK_SLOT_ENCBASE = 6;
+ static const int STACK_SLOT_DELTA_ENCBASE = 4;
+ static const int NUM_SAFE_POINTS_ENCBASE = 3;
+ static const int NUM_INTERRUPTIBLE_RANGES_ENCBASE = 2;
+ static const int NUM_EH_CLAUSES_ENCBASE = 3;
+ static const int POINTER_SIZE_ENCBASE = 3;
+ static const int LIVESTATE_RLE_RUN_ENCBASE = 2;
+ static const int LIVESTATE_RLE_SKIP_ENCBASE = 4;
+};
#elif defined(TARGET_ARM64)
#ifndef TARGET_POINTER_SIZE
#define TARGET_POINTER_SIZE 8 // equal to sizeof(void*) and the managed pointer size in bytes for this target
#endif
-#define NUM_NORM_CODE_OFFSETS_PER_CHUNK (64)
-#define NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 (6)
-#define NORMALIZE_STACK_SLOT(x) ((x)>>3) // GC Pointers are 8-bytes aligned
-#define DENORMALIZE_STACK_SLOT(x) ((x)<<3)
-#define NORMALIZE_CODE_LENGTH(x) ((x)>>2) // All Instructions are 4 bytes long
-#define DENORMALIZE_CODE_LENGTH(x) ((x)<<2)
-#define NORMALIZE_STACK_BASE_REGISTER(x) ((x)^29) // Encode Frame pointer X29 as zero
-#define DENORMALIZE_STACK_BASE_REGISTER(x) ((x)^29)
-#define NORMALIZE_SIZE_OF_STACK_AREA(x) ((x)>>3)
-#define DENORMALIZE_SIZE_OF_STACK_AREA(x) ((x)<<3)
-#define CODE_OFFSETS_NEED_NORMALIZATION 1
-#define NORMALIZE_CODE_OFFSET(x) ((x)>>2) // Instructions are 4 bytes long
-#define DENORMALIZE_CODE_OFFSET(x) ((x)<<2)
-#define NORMALIZE_REGISTER(x) (x)
-#define DENORMALIZE_REGISTER(x) (x)
-#define NORMALIZE_NUM_SAFE_POINTS(x) (x)
-#define DENORMALIZE_NUM_SAFE_POINTS(x) (x)
-#define NORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x)
-#define DENORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x)
-
-#define PSP_SYM_STACK_SLOT_ENCBASE 6
-#define GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE 6
-#define SECURITY_OBJECT_STACK_SLOT_ENCBASE 6
-#define GS_COOKIE_STACK_SLOT_ENCBASE 6
-#define CODE_LENGTH_ENCBASE 8
-#define STACK_BASE_REGISTER_ENCBASE 2 // FP encoded as 0, SP as 2.
-#define SIZE_OF_STACK_AREA_ENCBASE 3
-#define SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE 4
-#define SIZE_OF_EDIT_AND_CONTINUE_FIXED_STACK_FRAME_ENCBASE 4
-#define REVERSE_PINVOKE_FRAME_ENCBASE 6
-#define NUM_REGISTERS_ENCBASE 3
-#define NUM_STACK_SLOTS_ENCBASE 2
-#define NUM_UNTRACKED_SLOTS_ENCBASE 1
-#define NORM_PROLOG_SIZE_ENCBASE 5
-#define NORM_EPILOG_SIZE_ENCBASE 3
-#define NORM_CODE_OFFSET_DELTA_ENCBASE 3
-#define INTERRUPTIBLE_RANGE_DELTA1_ENCBASE 6
-#define INTERRUPTIBLE_RANGE_DELTA2_ENCBASE 6
-#define REGISTER_ENCBASE 3
-#define REGISTER_DELTA_ENCBASE 2
-#define STACK_SLOT_ENCBASE 6
-#define STACK_SLOT_DELTA_ENCBASE 4
-#define NUM_SAFE_POINTS_ENCBASE 3
-#define NUM_INTERRUPTIBLE_RANGES_ENCBASE 1
-#define NUM_EH_CLAUSES_ENCBASE 2
-#define POINTER_SIZE_ENCBASE 3
-#define LIVESTATE_RLE_RUN_ENCBASE 2
-#define LIVESTATE_RLE_SKIP_ENCBASE 4
+
+#define TargetGcInfoEncoding ARM64GcInfoEncoding
+
+struct ARM64GcInfoEncoding {
+ static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK = (64);
+ static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 = (6);
+ // GC Pointers are 8-bytes aligned
+ static inline constexpr int32_t NORMALIZE_STACK_SLOT (int32_t x) { return ((x)>>3); }
+ static inline constexpr int32_t DENORMALIZE_STACK_SLOT (int32_t x) { return ((x)<<3); }
+ // All Instructions are 4 bytes long
+ static inline constexpr uint32_t NORMALIZE_CODE_LENGTH (uint32_t x) { return ((x)>>2); }
+ static inline constexpr uint32_t DENORMALIZE_CODE_LENGTH (uint32_t x) { return ((x)<<2); }
+ // Encode Frame pointer X29 as zero
+ static inline constexpr uint32_t NORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((x)^29); }
+ static inline constexpr uint32_t DENORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((x)^29); }
+ static inline constexpr uint32_t NORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)>>3); }
+ static inline constexpr uint32_t DENORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)<<3); }
+ static const bool CODE_OFFSETS_NEED_NORMALIZATION = true;
+ // Instructions are 4 bytes long
+ static inline constexpr uint32_t NORMALIZE_CODE_OFFSET (uint32_t x) { return ((x)>>2); }
+ static inline constexpr uint32_t DENORMALIZE_CODE_OFFSET (uint32_t x) { return ((x)<<2); }
+
+ static const int PSP_SYM_STACK_SLOT_ENCBASE = 6;
+ static const int GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE = 6;
+ static const int SECURITY_OBJECT_STACK_SLOT_ENCBASE = 6;
+ static const int GS_COOKIE_STACK_SLOT_ENCBASE = 6;
+ static const int CODE_LENGTH_ENCBASE = 8;
+ // FP encoded as 0, SP as 2.
+ static const int STACK_BASE_REGISTER_ENCBASE = 2;
+ static const int SIZE_OF_STACK_AREA_ENCBASE = 3;
+ static const int SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE = 4;
+ static const int SIZE_OF_EDIT_AND_CONTINUE_FIXED_STACK_FRAME_ENCBASE = 4;
+ static const int REVERSE_PINVOKE_FRAME_ENCBASE = 6;
+ static const int NUM_REGISTERS_ENCBASE = 3;
+ static const int NUM_STACK_SLOTS_ENCBASE = 2;
+ static const int NUM_UNTRACKED_SLOTS_ENCBASE = 1;
+ static const int NORM_PROLOG_SIZE_ENCBASE = 5;
+ static const int NORM_EPILOG_SIZE_ENCBASE = 3;
+ static const int NORM_CODE_OFFSET_DELTA_ENCBASE = 3;
+ static const int INTERRUPTIBLE_RANGE_DELTA1_ENCBASE = 6;
+ static const int INTERRUPTIBLE_RANGE_DELTA2_ENCBASE = 6;
+ static const int REGISTER_ENCBASE = 3;
+ static const int REGISTER_DELTA_ENCBASE = 2;
+ static const int STACK_SLOT_ENCBASE = 6;
+ static const int STACK_SLOT_DELTA_ENCBASE = 4;
+ static const int NUM_SAFE_POINTS_ENCBASE = 3;
+ static const int NUM_INTERRUPTIBLE_RANGES_ENCBASE = 1;
+ static const int NUM_EH_CLAUSES_ENCBASE = 2;
+ static const int POINTER_SIZE_ENCBASE = 3;
+ static const int LIVESTATE_RLE_RUN_ENCBASE = 2;
+ static const int LIVESTATE_RLE_SKIP_ENCBASE = 4;
+};
#elif defined(TARGET_LOONGARCH64)
#ifndef TARGET_POINTER_SIZE
#define TARGET_POINTER_SIZE 8 // equal to sizeof(void*) and the managed pointer size in bytes for this target
#endif
-#define NUM_NORM_CODE_OFFSETS_PER_CHUNK (64)
-#define NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 (6)
-#define NORMALIZE_STACK_SLOT(x) ((x)>>3) // GC Pointers are 8-bytes aligned
-#define DENORMALIZE_STACK_SLOT(x) ((x)<<3)
-#define NORMALIZE_CODE_LENGTH(x) ((x)>>2) // All Instructions are 4 bytes long
-#define DENORMALIZE_CODE_LENGTH(x) ((x)<<2)
-#define NORMALIZE_STACK_BASE_REGISTER(x) ((x) == 22 ? 0u : 1u) // Encode Frame pointer fp=$22 as zero
-#define DENORMALIZE_STACK_BASE_REGISTER(x) ((x) == 0 ? 22u : 3u)
-#define NORMALIZE_SIZE_OF_STACK_AREA(x) ((x)>>3)
-#define DENORMALIZE_SIZE_OF_STACK_AREA(x) ((x)<<3)
-#define CODE_OFFSETS_NEED_NORMALIZATION 1
-#define NORMALIZE_CODE_OFFSET(x) ((x)>>2) // Instructions are 4 bytes long
-#define DENORMALIZE_CODE_OFFSET(x) ((x)<<2)
-#define NORMALIZE_REGISTER(x) (x)
-#define DENORMALIZE_REGISTER(x) (x)
-#define NORMALIZE_NUM_SAFE_POINTS(x) (x)
-#define DENORMALIZE_NUM_SAFE_POINTS(x) (x)
-#define NORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x)
-#define DENORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x)
-
-#define PSP_SYM_STACK_SLOT_ENCBASE 6
-#define GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE 6
-#define SECURITY_OBJECT_STACK_SLOT_ENCBASE 6
-#define GS_COOKIE_STACK_SLOT_ENCBASE 6
-#define CODE_LENGTH_ENCBASE 8
-// FP/SP encoded as 0 or 1.
-#define STACK_BASE_REGISTER_ENCBASE 2
-#define SIZE_OF_STACK_AREA_ENCBASE 3
-#define SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE 4
-#define REVERSE_PINVOKE_FRAME_ENCBASE 6
-#define NUM_REGISTERS_ENCBASE 3
-#define NUM_STACK_SLOTS_ENCBASE 2
-#define NUM_UNTRACKED_SLOTS_ENCBASE 1
-#define NORM_PROLOG_SIZE_ENCBASE 5
-#define NORM_EPILOG_SIZE_ENCBASE 3
-#define NORM_CODE_OFFSET_DELTA_ENCBASE 3
-#define INTERRUPTIBLE_RANGE_DELTA1_ENCBASE 6
-#define INTERRUPTIBLE_RANGE_DELTA2_ENCBASE 6
-#define REGISTER_ENCBASE 3
-#define REGISTER_DELTA_ENCBASE 2
-#define STACK_SLOT_ENCBASE 6
-#define STACK_SLOT_DELTA_ENCBASE 4
-#define NUM_SAFE_POINTS_ENCBASE 3
-#define NUM_INTERRUPTIBLE_RANGES_ENCBASE 1
-#define NUM_EH_CLAUSES_ENCBASE 2
-#define POINTER_SIZE_ENCBASE 3
-#define LIVESTATE_RLE_RUN_ENCBASE 2
-#define LIVESTATE_RLE_SKIP_ENCBASE 4
+
+#define TargetGcInfoEncoding LoongArch64GcInfoEncoding
+
+struct LoongArch64GcInfoEncoding {
+ static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK = (64);
+ static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 = (6);
+ // GC Pointers are 8-bytes aligned
+ static inline constexpr int32_t NORMALIZE_STACK_SLOT (int32_t x) { return ((x)>>3); }
+ static inline constexpr int32_t DENORMALIZE_STACK_SLOT (int32_t x) { return ((x)<<3); }
+ // All Instructions are 4 bytes long
+ static inline constexpr uint32_t NORMALIZE_CODE_LENGTH (uint32_t x) { return ((x)>>2); }
+ static inline constexpr uint32_t DENORMALIZE_CODE_LENGTH (uint32_t x) { return ((x)<<2); }
+ // Encode Frame pointer fp=$22 as zero
+ static inline constexpr uint32_t NORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((x) == 22 ? 0u : 1u); }
+ static inline constexpr uint32_t DENORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((x) == 0 ? 22u : 3u); }
+ static inline constexpr uint32_t NORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)>>3); }
+ static inline constexpr uint32_t DENORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)<<3); }
+ static const bool CODE_OFFSETS_NEED_NORMALIZATION = true;
+ // Instructions are 4 bytes long
+ static inline constexpr uint32_t NORMALIZE_CODE_OFFSET (uint32_t x) { return ((x)>>2); }
+ static inline constexpr uint32_t DENORMALIZE_CODE_OFFSET (uint32_t x) { return ((x)<<2); }
+
+ static const int PSP_SYM_STACK_SLOT_ENCBASE = 6;
+ static const int GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE = 6;
+ static const int SECURITY_OBJECT_STACK_SLOT_ENCBASE = 6;
+ static const int GS_COOKIE_STACK_SLOT_ENCBASE = 6;
+ static const int CODE_LENGTH_ENCBASE = 8;
+ // FP/SP encoded as 0 or 1.
+ static const int STACK_BASE_REGISTER_ENCBASE = 2;
+ static const int SIZE_OF_STACK_AREA_ENCBASE = 3;
+ static const int SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE = 4;
+ static const int REVERSE_PINVOKE_FRAME_ENCBASE = 6;
+ static const int NUM_REGISTERS_ENCBASE = 3;
+ static const int NUM_STACK_SLOTS_ENCBASE = 2;
+ static const int NUM_UNTRACKED_SLOTS_ENCBASE = 1;
+ static const int NORM_PROLOG_SIZE_ENCBASE = 5;
+ static const int NORM_EPILOG_SIZE_ENCBASE = 3;
+ static const int NORM_CODE_OFFSET_DELTA_ENCBASE = 3;
+ static const int INTERRUPTIBLE_RANGE_DELTA1_ENCBASE = 6;
+ static const int INTERRUPTIBLE_RANGE_DELTA2_ENCBASE = 6;
+ static const int REGISTER_ENCBASE = 3;
+ static const int REGISTER_DELTA_ENCBASE = 2;
+ static const int STACK_SLOT_ENCBASE = 6;
+ static const int STACK_SLOT_DELTA_ENCBASE = 4;
+ static const int NUM_SAFE_POINTS_ENCBASE = 3;
+ static const int NUM_INTERRUPTIBLE_RANGES_ENCBASE = 1;
+ static const int NUM_EH_CLAUSES_ENCBASE = 2;
+ static const int POINTER_SIZE_ENCBASE = 3;
+ static const int LIVESTATE_RLE_RUN_ENCBASE = 2;
+ static const int LIVESTATE_RLE_SKIP_ENCBASE = 4;
+};
#elif defined(TARGET_RISCV64)
#ifndef TARGET_POINTER_SIZE
#define TARGET_POINTER_SIZE 8 // equal to sizeof(void*) and the managed pointer size in bytes for this target
#endif
-#define NUM_NORM_CODE_OFFSETS_PER_CHUNK (64)
-#define NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 (6)
-#define NORMALIZE_STACK_SLOT(x) ((x)>>3) // GC Pointers are 8-bytes aligned
-#define DENORMALIZE_STACK_SLOT(x) ((x)<<3)
-#define NORMALIZE_CODE_LENGTH(x) ((x)>>2) // All Instructions are 4 bytes long
-#define DENORMALIZE_CODE_LENGTH(x) ((x)<<2)
-#define NORMALIZE_STACK_BASE_REGISTER(x) ((x) == 8 ? 0u : 1u) // Encode Frame pointer X8 as zero, sp/x2 as 1
-#define DENORMALIZE_STACK_BASE_REGISTER(x) ((x) == 0 ? 8u : 2u)
-#define NORMALIZE_SIZE_OF_STACK_AREA(x) ((x)>>3)
-#define DENORMALIZE_SIZE_OF_STACK_AREA(x) ((x)<<3)
-#define CODE_OFFSETS_NEED_NORMALIZATION 1
-#define NORMALIZE_CODE_OFFSET(x) ((x)>>2) // Instructions are 4 bytes long
-#define DENORMALIZE_CODE_OFFSET(x) ((x)<<2)
-#define NORMALIZE_REGISTER(x) (x)
-#define DENORMALIZE_REGISTER(x) (x)
-#define NORMALIZE_NUM_SAFE_POINTS(x) (x)
-#define DENORMALIZE_NUM_SAFE_POINTS(x) (x)
-#define NORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x)
-#define DENORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x)
-
-#define PSP_SYM_STACK_SLOT_ENCBASE 6
-#define GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE 6
-#define SECURITY_OBJECT_STACK_SLOT_ENCBASE 6
-#define GS_COOKIE_STACK_SLOT_ENCBASE 6
-#define CODE_LENGTH_ENCBASE 8
-#define STACK_BASE_REGISTER_ENCBASE 2
-// FP encoded as 0, SP as 1
-#define SIZE_OF_STACK_AREA_ENCBASE 3
-#define SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE 4
-#define SIZE_OF_EDIT_AND_CONTINUE_FIXED_STACK_FRAME_ENCBASE 4
-#define REVERSE_PINVOKE_FRAME_ENCBASE 6
-#define NUM_REGISTERS_ENCBASE 3
-#define NUM_STACK_SLOTS_ENCBASE 2
-#define NUM_UNTRACKED_SLOTS_ENCBASE 1
-#define NORM_PROLOG_SIZE_ENCBASE 5
-#define NORM_EPILOG_SIZE_ENCBASE 3
-#define NORM_CODE_OFFSET_DELTA_ENCBASE 3
-#define INTERRUPTIBLE_RANGE_DELTA1_ENCBASE 6
-#define INTERRUPTIBLE_RANGE_DELTA2_ENCBASE 6
-#define REGISTER_ENCBASE 3
-#define REGISTER_DELTA_ENCBASE 2
-#define STACK_SLOT_ENCBASE 6
-#define STACK_SLOT_DELTA_ENCBASE 4
-#define NUM_SAFE_POINTS_ENCBASE 3
-#define NUM_INTERRUPTIBLE_RANGES_ENCBASE 1
-#define NUM_EH_CLAUSES_ENCBASE 2
-#define POINTER_SIZE_ENCBASE 3
-#define LIVESTATE_RLE_RUN_ENCBASE 2
-#define LIVESTATE_RLE_SKIP_ENCBASE 4
+#define TargetGcInfoEncoding RISCV64GcInfoEncoding
+
+struct RISCV64GcInfoEncoding {
+ static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK = (64);
+ static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 = (6);
+ // GC Pointers are 8-bytes aligned
+ static inline constexpr int32_t NORMALIZE_STACK_SLOT (int32_t x) { return ((x)>>3); }
+ static inline constexpr int32_t DENORMALIZE_STACK_SLOT (int32_t x) { return ((x)<<3); }
+ // All Instructions are 4 bytes long
+ static inline constexpr uint32_t NORMALIZE_CODE_LENGTH (uint32_t x) { return ((x)>>2); }
+ static inline constexpr uint32_t DENORMALIZE_CODE_LENGTH (uint32_t x) { return ((x)<<2); }
+ // Encode Frame pointer X8 as zero, sp/x2 as 1
+ static inline constexpr uint32_t NORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((x) == 8 ? 0u : 1u); }
+ static inline constexpr uint32_t DENORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return ((x) == 0 ? 8u : 2u); }
+ static inline constexpr uint32_t NORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)>>3); }
+ static inline constexpr uint32_t DENORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return ((x)<<3); }
+ static const bool CODE_OFFSETS_NEED_NORMALIZATION = true;
+ // Instructions are 4 bytes long
+ static inline constexpr uint32_t NORMALIZE_CODE_OFFSET (uint32_t x) { return ((x)>>2); }
+ static inline constexpr uint32_t DENORMALIZE_CODE_OFFSET (uint32_t x) { return ((x)<<2); }
+
+ static const int PSP_SYM_STACK_SLOT_ENCBASE = 6;
+ static const int GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE = 6;
+ static const int SECURITY_OBJECT_STACK_SLOT_ENCBASE = 6;
+ static const int GS_COOKIE_STACK_SLOT_ENCBASE = 6;
+ static const int CODE_LENGTH_ENCBASE = 8;
+ static const int STACK_BASE_REGISTER_ENCBASE = 2;
+ // FP encoded as 0, SP as 1
+ static const int SIZE_OF_STACK_AREA_ENCBASE = 3;
+ static const int SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE = 4;
+ static const int SIZE_OF_EDIT_AND_CONTINUE_FIXED_STACK_FRAME_ENCBASE = 4;
+ static const int REVERSE_PINVOKE_FRAME_ENCBASE = 6;
+ static const int NUM_REGISTERS_ENCBASE = 3;
+ static const int NUM_STACK_SLOTS_ENCBASE = 2;
+ static const int NUM_UNTRACKED_SLOTS_ENCBASE = 1;
+ static const int NORM_PROLOG_SIZE_ENCBASE = 5;
+ static const int NORM_EPILOG_SIZE_ENCBASE = 3;
+ static const int NORM_CODE_OFFSET_DELTA_ENCBASE = 3;
+ static const int INTERRUPTIBLE_RANGE_DELTA1_ENCBASE = 6;
+ static const int INTERRUPTIBLE_RANGE_DELTA2_ENCBASE = 6;
+ static const int REGISTER_ENCBASE = 3;
+ static const int REGISTER_DELTA_ENCBASE = 2;
+ static const int STACK_SLOT_ENCBASE = 6;
+ static const int STACK_SLOT_DELTA_ENCBASE = 4;
+ static const int NUM_SAFE_POINTS_ENCBASE = 3;
+ static const int NUM_INTERRUPTIBLE_RANGES_ENCBASE = 1;
+ static const int NUM_EH_CLAUSES_ENCBASE = 2;
+ static const int POINTER_SIZE_ENCBASE = 3;
+ static const int LIVESTATE_RLE_RUN_ENCBASE = 2;
+ static const int LIVESTATE_RLE_SKIP_ENCBASE = 4;
+};
-#else
+#else // defined(TARGET_xxx)
#ifndef TARGET_X86
#ifdef PORTABILITY_WARNING
@@ -890,55 +904,58 @@ PORTABILITY_WARNING("Please specialize these definitions for your platform!")
#ifndef TARGET_POINTER_SIZE
#define TARGET_POINTER_SIZE 4 // equal to sizeof(void*) and the managed pointer size in bytes for this target
#endif
-#define NUM_NORM_CODE_OFFSETS_PER_CHUNK (64)
-#define NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 (6)
-#define NORMALIZE_STACK_SLOT(x) (x)
-#define DENORMALIZE_STACK_SLOT(x) (x)
-#define NORMALIZE_CODE_LENGTH(x) (x)
-#define DENORMALIZE_CODE_LENGTH(x) (x)
-#define NORMALIZE_STACK_BASE_REGISTER(x) (x)
-#define DENORMALIZE_STACK_BASE_REGISTER(x) (x)
-#define NORMALIZE_SIZE_OF_STACK_AREA(x) (x)
-#define DENORMALIZE_SIZE_OF_STACK_AREA(x) (x)
-#define CODE_OFFSETS_NEED_NORMALIZATION 0
-#define NORMALIZE_CODE_OFFSET(x) (x)
-#define DENORMALIZE_CODE_OFFSET(x) (x)
-#define NORMALIZE_REGISTER(x) (x)
-#define DENORMALIZE_REGISTER(x) (x)
-#define NORMALIZE_NUM_SAFE_POINTS(x) (x)
-#define DENORMALIZE_NUM_SAFE_POINTS(x) (x)
-#define NORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x)
-#define DENORMALIZE_NUM_INTERRUPTIBLE_RANGES(x) (x)
-
-#define PSP_SYM_STACK_SLOT_ENCBASE 6
-#define GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE 6
-#define SECURITY_OBJECT_STACK_SLOT_ENCBASE 6
-#define GS_COOKIE_STACK_SLOT_ENCBASE 6
-#define CODE_LENGTH_ENCBASE 6
-#define STACK_BASE_REGISTER_ENCBASE 3
-#define SIZE_OF_STACK_AREA_ENCBASE 6
-#define SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE 3
-#define REVERSE_PINVOKE_FRAME_ENCBASE 6
-#define NUM_REGISTERS_ENCBASE 3
-#define NUM_STACK_SLOTS_ENCBASE 5
-#define NUM_UNTRACKED_SLOTS_ENCBASE 5
-#define NORM_PROLOG_SIZE_ENCBASE 4
-#define NORM_EPILOG_SIZE_ENCBASE 3
-#define NORM_CODE_OFFSET_DELTA_ENCBASE 3
-#define INTERRUPTIBLE_RANGE_DELTA1_ENCBASE 5
-#define INTERRUPTIBLE_RANGE_DELTA2_ENCBASE 5
-#define REGISTER_ENCBASE 3
-#define REGISTER_DELTA_ENCBASE REGISTER_ENCBASE
-#define STACK_SLOT_ENCBASE 6
-#define STACK_SLOT_DELTA_ENCBASE 4
-#define NUM_SAFE_POINTS_ENCBASE 4
-#define NUM_INTERRUPTIBLE_RANGES_ENCBASE 1
-#define NUM_EH_CLAUSES_ENCBASE 2
-#define POINTER_SIZE_ENCBASE 3
-#define LIVESTATE_RLE_RUN_ENCBASE 2
-#define LIVESTATE_RLE_SKIP_ENCBASE 4
-#endif
+#define TargetGcInfoEncoding X86GcInfoEncoding
+
+struct X86GcInfoEncoding {
+ static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK = (64);
+ static const uint32_t NUM_NORM_CODE_OFFSETS_PER_CHUNK_LOG2 = (6);
+ static inline constexpr int32_t NORMALIZE_STACK_SLOT (int32_t x) { return (x); }
+ static inline constexpr int32_t DENORMALIZE_STACK_SLOT (int32_t x) { return (x); }
+ static inline constexpr uint32_t NORMALIZE_CODE_LENGTH (uint32_t x) { return (x); }
+ static inline constexpr uint32_t DENORMALIZE_CODE_LENGTH (uint32_t x) { return (x); }
+ static inline constexpr uint32_t NORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return (x); }
+ static inline constexpr uint32_t DENORMALIZE_STACK_BASE_REGISTER (uint32_t x) { return (x); }
+ static inline constexpr uint32_t NORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return (x); }
+ static inline constexpr uint32_t DENORMALIZE_SIZE_OF_STACK_AREA (uint32_t x) { return (x); }
+ static const bool CODE_OFFSETS_NEED_NORMALIZATION = false;
+ static inline constexpr uint32_t NORMALIZE_CODE_OFFSET (uint32_t x) { return (x); }
+ static inline constexpr uint32_t DENORMALIZE_CODE_OFFSET (uint32_t x) { return (x); }
+
+ static const int PSP_SYM_STACK_SLOT_ENCBASE = 6;
+ static const int GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE = 6;
+ static const int SECURITY_OBJECT_STACK_SLOT_ENCBASE = 6;
+ static const int GS_COOKIE_STACK_SLOT_ENCBASE = 6;
+ static const int CODE_LENGTH_ENCBASE = 6;
+ static const int STACK_BASE_REGISTER_ENCBASE = 3;
+ static const int SIZE_OF_STACK_AREA_ENCBASE = 6;
+ static const int SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE = 3;
+ static const int REVERSE_PINVOKE_FRAME_ENCBASE = 6;
+ static const int NUM_REGISTERS_ENCBASE = 3;
+ static const int NUM_STACK_SLOTS_ENCBASE = 5;
+ static const int NUM_UNTRACKED_SLOTS_ENCBASE = 5;
+ static const int NORM_PROLOG_SIZE_ENCBASE = 4;
+ static const int NORM_EPILOG_SIZE_ENCBASE = 3;
+ static const int NORM_CODE_OFFSET_DELTA_ENCBASE = 3;
+ static const int INTERRUPTIBLE_RANGE_DELTA1_ENCBASE = 5;
+ static const int INTERRUPTIBLE_RANGE_DELTA2_ENCBASE = 5;
+ static const int REGISTER_ENCBASE = 3;
+ static const int REGISTER_DELTA_ENCBASE = REGISTER_ENCBASE;
+ static const int STACK_SLOT_ENCBASE = 6;
+ static const int STACK_SLOT_DELTA_ENCBASE = 4;
+ static const int NUM_SAFE_POINTS_ENCBASE = 4;
+ static const int NUM_INTERRUPTIBLE_RANGES_ENCBASE = 1;
+ static const int NUM_EH_CLAUSES_ENCBASE = 2;
+ static const int POINTER_SIZE_ENCBASE = 3;
+ static const int LIVESTATE_RLE_RUN_ENCBASE = 2;
+ static const int LIVESTATE_RLE_SKIP_ENCBASE = 4;
+};
+
+#endif // defined(TARGET_xxx)
+
+#ifdef debug_instrumented_return
+#define return debug_instrumented_return
+#endif // debug_instrumented_return
#endif // !__GCINFOTYPES_H__
diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h
index ca025f62a7ec4d..c62b79a09b3976 100644
--- a/src/coreclr/inc/jiteeversionguid.h
+++ b/src/coreclr/inc/jiteeversionguid.h
@@ -37,11 +37,11 @@
#include
-constexpr GUID JITEEVersionIdentifier = { /* 4463d6ac-dfcb-4ab0-a941-c53b56089b7c */
- 0x4463d6ac,
- 0xdfcb,
- 0x4ab0,
- {0xa9, 0x41, 0xc5, 0x3b, 0x56, 0x08, 0x9b, 0x7c}
+constexpr GUID JITEEVersionIdentifier = { /* 78acb599-d9be-4ea1-8e93-546ec43e0487 */
+ 0x78acb599,
+ 0xd9be,
+ 0x4ea1,
+ {0x8e, 0x93, 0x54, 0x6e, 0xc4, 0x3e, 0x04, 0x87}
};
#endif // JIT_EE_VERSIONING_GUID_H
diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h
index 38114a9bbfcada..72748e42cbde03 100644
--- a/src/coreclr/inc/jithelpers.h
+++ b/src/coreclr/inc/jithelpers.h
@@ -92,7 +92,7 @@
DYNAMICJITHELPER(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1,METHOD__NIL)
DYNAMICJITHELPER(CORINFO_HELP_NEWARR_1_ALIGN8, JIT_NewArr1,METHOD__NIL)
- JITHELPER(CORINFO_HELP_STRCNS, JIT_StrCns, METHOD__NIL)
+ DYNAMICJITHELPER(CORINFO_HELP_STRCNS, NULL, METHOD__STRING__STRCNS)
// Object model
DYNAMICJITHELPER(CORINFO_HELP_INITCLASS, NULL, METHOD__INITHELPERS__INITCLASS)
@@ -150,7 +150,7 @@
// GC support
DYNAMICJITHELPER(CORINFO_HELP_STOP_FOR_GC, JIT_RareDisableHelper, METHOD__NIL)
DYNAMICJITHELPER(CORINFO_HELP_POLL_GC, JIT_PollGC, METHOD__THREAD__POLLGC)
-
+
JITHELPER(CORINFO_HELP_CHECK_OBJ, JIT_CheckObj, METHOD__NIL)
// GC Write barrier support
diff --git a/src/coreclr/inc/longfilepathwrappers.h b/src/coreclr/inc/longfilepathwrappers.h
index 6407680900dc14..82046987ee6e48 100644
--- a/src/coreclr/inc/longfilepathwrappers.h
+++ b/src/coreclr/inc/longfilepathwrappers.h
@@ -25,11 +25,6 @@ CreateFileWrapper(
_In_opt_ HANDLE hTemplateFile
);
-DWORD
-GetFileAttributesWrapper(
- _In_ LPCWSTR lpFileName
- );
-
BOOL
GetFileAttributesExWrapper(
_In_ LPCWSTR lpFileName,
diff --git a/src/coreclr/inc/palclr.h b/src/coreclr/inc/palclr.h
index c5628a1b9eee6e..410c0a7c06d12c 100644
--- a/src/coreclr/inc/palclr.h
+++ b/src/coreclr/inc/palclr.h
@@ -48,8 +48,6 @@
#endif // !_MSC_VER
#endif // !NOINLINE
-#define ANALYZER_NORETURN
-
#ifdef _MSC_VER
#define EMPTY_BASES_DECL __declspec(empty_bases)
#else
diff --git a/src/coreclr/inc/pedecoder.h b/src/coreclr/inc/pedecoder.h
index 057dfa9a25de61..6b13de957bfb93 100644
--- a/src/coreclr/inc/pedecoder.h
+++ b/src/coreclr/inc/pedecoder.h
@@ -89,6 +89,8 @@ inline CHECK CheckOverflow(RVA value1, COUNT_T value2)
#define IMAGE_FILE_MACHINE_NATIVE IMAGE_FILE_MACHINE_UNKNOWN
#elif defined(TARGET_RISCV64)
#define IMAGE_FILE_MACHINE_NATIVE IMAGE_FILE_MACHINE_RISCV64
+#elif defined(TARGET_WASM)
+#define IMAGE_FILE_MACHINE_NATIVE IMAGE_FILE_MACHINE_UNKNOWN
#else
#error "port me"
#endif
diff --git a/src/coreclr/inc/regdisp.h b/src/coreclr/inc/regdisp.h
index 07d3f1f6d5e057..3aa4be6fd9e60c 100644
--- a/src/coreclr/inc/regdisp.h
+++ b/src/coreclr/inc/regdisp.h
@@ -345,6 +345,25 @@ inline TADDR GetRegdisplayStackMark(REGDISPLAY *display) {
return GetSP(display->pCallerContext);
}
+#elif defined(TARGET_WASM)
+struct REGDISPLAY : public REGDISPLAY_BASE {
+ REGDISPLAY()
+ {
+ // Initialize
+ memset(this, 0, sizeof(REGDISPLAY));
+ }
+};
+
+inline void SyncRegDisplayToCurrentContext(REGDISPLAY* pRD)
+{
+}
+
+// This function tells us if the given stack pointer is in one of the frames of the functions called by the given frame
+inline BOOL IsInCalleesFrames(REGDISPLAY *display, LPVOID stackPointer) {
+ _ASSERTE("IsInCalleesFrames is not implemented on wasm");
+ return FALSE;
+}
+
#else // none of the above processors
#error "RegDisplay functions are not implemented on this platform."
#endif
diff --git a/src/coreclr/inc/switches.h b/src/coreclr/inc/switches.h
index 06fdaa5f397510..9534511ef2a99d 100644
--- a/src/coreclr/inc/switches.h
+++ b/src/coreclr/inc/switches.h
@@ -43,7 +43,7 @@
#define GC_STATS
#endif
-#if defined(TARGET_X86) || defined(TARGET_ARM)
+#if defined(TARGET_X86) || defined(TARGET_ARM) || defined(TARGET_BROWSER)
#define USE_LAZY_PREFERRED_RANGE 0
#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_S390X) || defined(TARGET_LOONGARCH64) || defined(TARGET_POWERPC64) || defined(TARGET_RISCV64)
diff --git a/src/coreclr/inc/targetosarch.h b/src/coreclr/inc/targetosarch.h
index 00fe5b70647e7d..217db1de7b6fa3 100644
--- a/src/coreclr/inc/targetosarch.h
+++ b/src/coreclr/inc/targetosarch.h
@@ -95,6 +95,14 @@ class TargetArchitecture
static const bool IsArmArch = false;
static const bool IsLoongArch64 = false;
static const bool IsRiscV64 = true;
+#elif defined(TARGET_WASM)
+ static const bool IsX86 = false;
+ static const bool IsX64 = false;
+ static const bool IsArm64 = false;
+ static const bool IsArm32 = false;
+ static const bool IsArmArch = false;
+ static const bool IsLoongArch64 = false;
+ static const bool IsRiscV64 = false;
#else
#error Unknown architecture
#endif
diff --git a/src/coreclr/inc/utilcode.h b/src/coreclr/inc/utilcode.h
index 9b8e1ee3dfd704..0221afafe08c9b 100644
--- a/src/coreclr/inc/utilcode.h
+++ b/src/coreclr/inc/utilcode.h
@@ -37,6 +37,7 @@ using std::nothrow;
#include
#include
+#include
#include
#include "clrnt.h"
diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp
index e5d3648306bf23..a301e5d5119d39 100644
--- a/src/coreclr/interpreter/compiler.cpp
+++ b/src/coreclr/interpreter/compiler.cpp
@@ -2,8 +2,6 @@
// The .NET Foundation licenses this file to you under the MIT license.
#include "interpreter.h"
-#include "openum.h"
-
static const StackType g_stackTypeFromInterpType[] =
{
StackTypeI4, // I1
@@ -15,21 +13,24 @@ static const StackType g_stackTypeFromInterpType[] =
StackTypeR4, // R4
StackTypeR8, // R8
StackTypeO, // O
- StackTypeVT // VT
+ StackTypeVT, // VT
+ StackTypeByRef, // ByRef
};
static const InterpType g_interpTypeFromStackType[] =
{
- InterpTypeI4, // I4,
- InterpTypeI8, // I8,
- InterpTypeR4, // R4,
- InterpTypeR8, // R8,
- InterpTypeO, // O,
- InterpTypeVT, // VT,
- InterpTypeI, // MP,
- InterpTypeI, // F
+ InterpTypeI4, // I4,
+ InterpTypeI8, // I8,
+ InterpTypeR4, // R4,
+ InterpTypeR8, // R8,
+ InterpTypeO, // O,
+ InterpTypeVT, // VT,
+ InterpTypeByRef, // MP,
+ InterpTypeI, // F
};
+static const char *g_stackTypeString[] = { "I4", "I8", "R4", "R8", "O ", "VT", "MP", "F " };
+
// FIXME Use specific allocators for their intended purpose
// Allocator for data that is kept alive throughout application execution,
// being freed only if the associated method gets freed.
@@ -45,6 +46,13 @@ void* InterpCompiler::AllocMemPool(size_t numBytes)
return malloc(numBytes);
}
+void* InterpCompiler::AllocMemPool0(size_t numBytes)
+{
+ void *ptr = AllocMemPool(numBytes);
+ memset(ptr, 0, numBytes);
+ return ptr;
+}
+
// Allocator for potentially larger chunks of data, that we might want to free
// eagerly, before method is finished compiling, to prevent excessive memory usage.
void* InterpCompiler::AllocTemporary(size_t numBytes)
@@ -52,6 +60,13 @@ void* InterpCompiler::AllocTemporary(size_t numBytes)
return malloc(numBytes);
}
+void* InterpCompiler::AllocTemporary0(size_t numBytes)
+{
+ void *ptr = AllocTemporary(numBytes);
+ memset(ptr, 0, numBytes);
+ return ptr;
+}
+
void* InterpCompiler::ReallocTemporary(void* ptr, size_t numBytes)
{
return realloc(ptr, numBytes);
@@ -94,7 +109,7 @@ InterpInst* InterpCompiler::NewIns(int opcode, int dataLen)
InterpInst *ins = (InterpInst*)AllocMemPool(insSize);
memset(ins, 0, insSize);
ins->opcode = opcode;
- ins->ilOffset = -1;
+ ins->ilOffset = m_currentILOffset;
m_pLastIns = ins;
return ins;
}
@@ -172,7 +187,14 @@ bool InterpCompiler::InsIsNop(InterpInst *ins)
int32_t InterpCompiler::GetInsLength(InterpInst *ins)
{
- return g_interpOpLen[ins->opcode];
+ int len = g_interpOpLen[ins->opcode];
+ if (len == 0)
+ {
+ assert(ins->opcode == INTOP_SWITCH);
+ len = 3 + ins->data[0];
+ }
+
+ return len;
}
void InterpCompiler::ForEachInsSVar(InterpInst *ins, void *pData, void (InterpCompiler::*callback)(int*, void*))
@@ -186,7 +208,7 @@ void InterpCompiler::ForEachInsSVar(InterpInst *ins, void *pData, void (InterpCo
{
if (ins->info.pCallInfo && ins->info.pCallInfo->pCallArgs) {
int *callArgs = ins->info.pCallInfo->pCallArgs;
- while (*callArgs != -1)
+ while (*callArgs != CALL_ARGS_TERMINATOR)
{
(this->*callback) (callArgs, pData);
callArgs++;
@@ -210,14 +232,12 @@ void InterpCompiler::ForEachInsVar(InterpInst *ins, void *pData, void (InterpCom
}
-InterpBasicBlock* InterpCompiler::AllocBB()
+InterpBasicBlock* InterpCompiler::AllocBB(int32_t ilOffset)
{
InterpBasicBlock *bb = (InterpBasicBlock*)AllocMemPool(sizeof(InterpBasicBlock));
- memset(bb, 0, sizeof(InterpBasicBlock));
- bb->ilOffset = -1;
- bb->nativeOffset = -1;
- bb->stackHeight = -1;
- bb->index = m_BBCount++;
+
+ new (bb) InterpBasicBlock (m_BBCount, ilOffset);
+ m_BBCount++;
return bb;
}
@@ -227,9 +247,8 @@ InterpBasicBlock* InterpCompiler::GetBB(int32_t ilOffset)
if (!bb)
{
- bb = AllocBB ();
+ bb = AllocBB(ilOffset);
- bb->ilOffset = ilOffset;
m_ppOffsetToBB[ilOffset] = bb;
}
@@ -339,7 +358,103 @@ void InterpCompiler::UnlinkBBs(InterpBasicBlock *from, InterpBasicBlock *to)
to->inCount--;
}
-int32_t InterpCompiler::CreateVarExplicit(InterpType mt, CORINFO_CLASS_HANDLE clsHnd, int size)
+// These are moves between vars, operating only on the interpreter stack
+int32_t InterpCompiler::InterpGetMovForType(InterpType interpType, bool signExtend)
+{
+ switch (interpType)
+ {
+ case InterpTypeI1:
+ case InterpTypeU1:
+ case InterpTypeI2:
+ case InterpTypeU2:
+ if (signExtend)
+ return INTOP_MOV_I4_I1 + interpType;
+ else
+ return INTOP_MOV_4;
+ case InterpTypeI4:
+ case InterpTypeR4:
+ return INTOP_MOV_4;
+ case InterpTypeI8:
+ case InterpTypeR8:
+ return INTOP_MOV_8;
+ case InterpTypeO:
+ case InterpTypeByRef:
+ return INTOP_MOV_P;
+ case InterpTypeVT:
+ return INTOP_MOV_VT;
+ default:
+ assert(0);
+ }
+ return -1;
+}
+
+// This method needs to be called when the current basic blocks ends and execution can
+// continue into pTargetBB. When the stack state of a basic block is initialized, the vars
+// associated with the stack state are set. When another bblock will continue execution
+// into this bblock, it will first have to emit moves from the vars in its stack state
+// to the vars of the target bblock stack state.
+void InterpCompiler::EmitBBEndVarMoves(InterpBasicBlock *pTargetBB)
+{
+ if (pTargetBB->stackHeight <= 0)
+ return;
+
+ for (int i = 0; i < pTargetBB->stackHeight; i++)
+ {
+ int sVar = m_pStackPointer[i].var;
+ int dVar = pTargetBB->pStackState[i].var;
+ if (sVar != dVar)
+ {
+ InterpType interpType = m_pVars[sVar].interpType;
+ int32_t movOp = InterpGetMovForType(interpType, false);
+
+ AddIns(movOp);
+ m_pLastIns->SetSVar(m_pStackPointer[i].var);
+ m_pLastIns->SetDVar(pTargetBB->pStackState[i].var);
+
+ if (interpType == InterpTypeVT)
+ {
+ assert(m_pVars[sVar].size == m_pVars[dVar].size);
+ m_pLastIns->data[0] = m_pVars[sVar].size;
+ }
+ }
+ }
+}
+
+static void MergeStackTypeInfo(StackInfo *pState1, StackInfo *pState2, int len)
+{
+ // Discard type information if we have type conflicts for stack contents
+ for (int i = 0; i < len; i++)
+ {
+ if (pState1[i].clsHnd != pState2[i].clsHnd)
+ {
+ pState1[i].clsHnd = NULL;
+ pState2[i].clsHnd = NULL;
+ }
+ }
+}
+
+// Initializes stack state at entry to bb, based on the current stack state
+void InterpCompiler::InitBBStackState(InterpBasicBlock *pBB)
+{
+ if (pBB->stackHeight >= 0)
+ {
+ // Already initialized, update stack information
+ MergeStackTypeInfo(m_pStackBase, pBB->pStackState, pBB->stackHeight);
+ }
+ else
+ {
+ pBB->stackHeight = (int32_t)(m_pStackPointer - m_pStackBase);
+ if (pBB->stackHeight > 0)
+ {
+ int size = pBB->stackHeight * sizeof (StackInfo);
+ pBB->pStackState = (StackInfo*)AllocMemPool(size);
+ memcpy (pBB->pStackState, m_pStackBase, size);
+ }
+ }
+}
+
+
+int32_t InterpCompiler::CreateVarExplicit(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd, int size)
{
if (m_varsSize == m_varsCapacity) {
m_varsCapacity *= 2;
@@ -349,12 +464,7 @@ int32_t InterpCompiler::CreateVarExplicit(InterpType mt, CORINFO_CLASS_HANDLE cl
}
InterpVar *var = &m_pVars[m_varsSize];
- var->mt = mt;
- var->clsHnd = clsHnd;
- var->size = size;
- var->indirects = 0;
- var->offset = -1;
- var->liveStart = -1;
+ new (var) InterpVar(interpType, clsHnd, size);
m_varsSize++;
return m_varsSize - 1;
@@ -375,7 +485,25 @@ void InterpCompiler::EnsureStack(int additional)
do \
{ \
if (!CheckStackHelper (n)) \
- goto exit; \
+ goto exit_bad_code; \
+ } while (0)
+
+#define CHECK_STACK_RET_VOID(n) \
+ do { \
+ if (!CheckStackHelper(n)) \
+ return; \
+ } while (0)
+
+#define CHECK_STACK_RET(n, ret) \
+ do { \
+ if (!CheckStackHelper(n)) \
+ return ret; \
+ } while (0)
+
+#define INVALID_CODE_RET_VOID \
+ do { \
+ m_hasInvalidCode = true; \
+ return; \
} while (0)
bool InterpCompiler::CheckStackHelper(int n)
@@ -400,12 +528,17 @@ void InterpCompiler::PushTypeExplicit(StackType stackType, CORINFO_CLASS_HANDLE
m_pStackPointer++;
}
-void InterpCompiler::PushType(StackType stackType, CORINFO_CLASS_HANDLE clsHnd)
+void InterpCompiler::PushStackType(StackType stackType, CORINFO_CLASS_HANDLE clsHnd)
{
// We don't really care about the exact size for non-valuetypes
PushTypeExplicit(stackType, clsHnd, INTERP_STACK_SLOT_SIZE);
}
+void InterpCompiler::PushInterpType(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd)
+{
+ PushStackType(g_stackTypeFromInterpType[interpType], clsHnd);
+}
+
void InterpCompiler::PushTypeVT(CORINFO_CLASS_HANDLE clsHnd, int size)
{
PushTypeExplicit(StackTypeVT, clsHnd, size);
@@ -426,42 +559,109 @@ int32_t InterpCompiler::ComputeCodeSize()
return codeSize;
}
-int32_t* InterpCompiler::EmitCodeIns(int32_t *ip, InterpInst *ins)
+int32_t* InterpCompiler::EmitCodeIns(int32_t *ip, InterpInst *ins, TArray *relocs)
{
int32_t opcode = ins->opcode;
int32_t *startIp = ip;
*ip++ = opcode;
- if (g_interpOpDVars[opcode])
- *ip++ = m_pVars[ins->dVar].offset;
-
- if (g_interpOpSVars[opcode])
+ if (opcode == INTOP_SWITCH)
+ {
+ int32_t numLabels = ins->data [0];
+ *ip++ = m_pVars[ins->sVars[0]].offset;
+ *ip++ = numLabels;
+ // Add relocation for each label
+ for (int32_t i = 0; i < numLabels; i++)
+ {
+ Reloc *reloc = (Reloc*)AllocMemPool(sizeof(Reloc));
+ new (reloc) Reloc(RelocSwitch, (int32_t)(ip - m_pMethodCode), ins->info.ppTargetBBTable[i], 0);
+ relocs->Add(reloc);
+ *ip++ = (int32_t)0xdeadbeef;
+ }
+ }
+ else if (InterpOpIsUncondBranch(opcode) || InterpOpIsCondBranch(opcode))
{
+ int32_t brBaseOffset = (int32_t)(startIp - m_pMethodCode);
for (int i = 0; i < g_interpOpSVars[opcode]; i++)
+ *ip++ = m_pVars[ins->sVars[i]].offset;
+
+ if (ins->info.pTargetBB->nativeOffset >= 0)
{
- if (ins->sVars[i] == CALL_ARGS_SVAR)
- {
- *ip++ = m_paramAreaOffset + ins->info.pCallInfo->callOffset;
- }
- else
+ *ip++ = ins->info.pTargetBB->nativeOffset - brBaseOffset;
+ }
+ else if (opcode == INTOP_BR && ins->info.pTargetBB == m_pCBB->pNextBB)
+ {
+ // Ignore branch to the next basic block. Revert the added INTOP_BR.
+ ip--;
+ }
+ else
+ {
+ // We don't know yet the IR offset of the target, add a reloc instead
+ Reloc *reloc = (Reloc*)AllocMemPool(sizeof(Reloc));
+ new (reloc) Reloc(RelocLongBranch, brBaseOffset, ins->info.pTargetBB, g_interpOpSVars[opcode]);
+ relocs->Add(reloc);
+ *ip++ = (int32_t)0xdeadbeef;
+ }
+ }
+ else
+ {
+ // Default code emit for an instruction. The opcode was already emitted above.
+ // We emit the offset for the instruction destination, then for every single source
+ // variable we emit another offset. Finally, we will emit any additional data needed
+ // by the instruction.
+ if (g_interpOpDVars[opcode])
+ *ip++ = m_pVars[ins->dVar].offset;
+
+ if (g_interpOpSVars[opcode])
+ {
+ for (int i = 0; i < g_interpOpSVars[opcode]; i++)
{
- *ip++ = m_pVars[ins->sVars[i]].offset;
+ if (ins->sVars[i] == CALL_ARGS_SVAR)
+ {
+ *ip++ = m_paramAreaOffset + ins->info.pCallInfo->callOffset;
+ }
+ else
+ {
+ *ip++ = m_pVars[ins->sVars[i]].offset;
+ }
}
}
- }
- int left = GetInsLength(ins) - (int32_t)(ip - startIp);
- // Emit the rest of the data
- for (int i = 0; i < left; i++)
- *ip++ = ins->data[i];
+ int left = GetInsLength(ins) - (int32_t)(ip - startIp);
+ // Emit the rest of the data
+ for (int i = 0; i < left; i++)
+ *ip++ = ins->data[i];
+ }
return ip;
}
+void InterpCompiler::PatchRelocations(TArray *relocs)
+{
+ int32_t size = relocs->GetSize();
+
+ for (int32_t i = 0; i < size; i++)
+ {
+ Reloc *reloc = relocs->Get(i);
+ int32_t offset = reloc->pTargetBB->nativeOffset - reloc->offset;
+ int32_t *pSlot = NULL;
+
+ if (reloc->type == RelocLongBranch)
+ pSlot = m_pMethodCode + reloc->offset + reloc->skip + 1;
+ else if (reloc->type == RelocSwitch)
+ pSlot = m_pMethodCode + reloc->offset;
+ else
+ assert(0);
+
+ assert(*pSlot == (int32_t)0xdeadbeef);
+ *pSlot = offset;
+ }
+}
void InterpCompiler::EmitCode()
{
+ TArray relocs;
int32_t codeSize = ComputeCodeSize();
m_pMethodCode = (int32_t*)AllocMethodData(codeSize * sizeof(int32_t));
@@ -469,117 +669,1833 @@ void InterpCompiler::EmitCode()
for (InterpBasicBlock *bb = m_pEntryBB; bb != NULL; bb = bb->pNextBB)
{
bb->nativeOffset = (int32_t)(ip - m_pMethodCode);
+ m_pCBB = bb;
for (InterpInst *ins = bb->pFirstIns; ins != NULL; ins = ins->pNext)
{
- ip = EmitCodeIns(ip, ins);
+ ip = EmitCodeIns(ip, ins, &relocs);
}
}
- m_MethodCodeSize = (int32_t)(ip - m_pMethodCode);
+ m_methodCodeSize = (int32_t)(ip - m_pMethodCode);
+
+ PatchRelocations(&relocs);
}
InterpMethod* InterpCompiler::CreateInterpMethod()
{
- InterpMethod *pMethod = new InterpMethod(m_methodHnd, m_totalVarsStackSize);
+ int numDataItems = m_dataItems.GetSize();
+ void **pDataItems = (void**)AllocMethodData(numDataItems * sizeof(void*));
+
+ for (int i = 0; i < numDataItems; i++)
+ pDataItems[i] = m_dataItems.Get(i);
+
+ InterpMethod *pMethod = new InterpMethod(m_methodHnd, m_totalVarsStackSize, pDataItems);
return pMethod;
}
int32_t* InterpCompiler::GetCode(int32_t *pCodeSize)
{
- *pCodeSize = m_MethodCodeSize;
+ *pCodeSize = m_methodCodeSize;
return m_pMethodCode;
}
InterpCompiler::InterpCompiler(COMP_HANDLE compHnd,
- CORINFO_METHOD_INFO* methodInfo)
+ CORINFO_METHOD_INFO* methodInfo,
+ bool verbose)
{
m_methodHnd = methodInfo->ftn;
+ m_compScopeHnd = methodInfo->scope;
m_compHnd = compHnd;
m_methodInfo = methodInfo;
+ m_verbose = verbose;
}
InterpMethod* InterpCompiler::CompileMethod()
{
+#ifdef DEBUG
+ if (m_verbose)
+ {
+ printf("Interpreter compile method ");
+ PrintMethodName(m_methodHnd);
+ printf("\n");
+ }
+#endif
+
+ CreateILVars();
+
GenerateCode(m_methodInfo);
+#ifdef DEBUG
+ if (m_verbose)
+ {
+ printf("\nUnoptimized IR:\n");
+ PrintCode();
+ }
+#endif
+
AllocOffsets();
EmitCode();
+#ifdef DEBUG
+ if (m_verbose)
+ {
+ printf("\nCompiled method: ");
+ PrintMethodName(m_methodHnd);
+ printf("\nLocals size %d\n", m_totalVarsStackSize);
+ PrintCompiledCode();
+ printf("\n");
+ }
+#endif
+
return CreateInterpMethod();
}
-int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo)
+// Adds a conversion instruction for the value pointed to by sp, also updating the stack information
+void InterpCompiler::EmitConv(StackInfo *sp, InterpInst *prevIns, StackType type, InterpOpcode convOp)
{
- uint8_t *ip = methodInfo->ILCode;
- uint8_t *codeEnd = ip + methodInfo->ILCodeSize;
+ InterpInst *newInst;
+ if (prevIns)
+ newInst = InsertIns(prevIns, convOp);
+ else
+ newInst = AddIns(convOp);
- m_ppOffsetToBB = (InterpBasicBlock**)AllocMemPool(sizeof(InterpBasicBlock*) * (methodInfo->ILCodeSize + 1));
- m_stackCapacity = methodInfo->maxStack + 1;
- m_pStackBase = m_pStackPointer = (StackInfo*)AllocTemporary(sizeof(StackInfo) * m_stackCapacity);
+ newInst->SetSVar(sp->var);
+ new (sp) StackInfo(type);
+ int32_t var = CreateVarExplicit(g_interpTypeFromStackType[type], NULL, INTERP_STACK_SLOT_SIZE);
+ sp->var = var;
+ newInst->SetDVar(var);
+}
+
+static InterpType GetInterpType(CorInfoType corInfoType)
+{
+ switch (corInfoType)
+ {
+ case CORINFO_TYPE_BYTE:
+ return InterpTypeI1;
+ case CORINFO_TYPE_UBYTE:
+ case CORINFO_TYPE_BOOL:
+ return InterpTypeU1;
+ case CORINFO_TYPE_CHAR:
+ case CORINFO_TYPE_USHORT:
+ return InterpTypeU2;
+ case CORINFO_TYPE_SHORT:
+ return InterpTypeI2;
+ case CORINFO_TYPE_INT:
+ case CORINFO_TYPE_UINT:
+ return InterpTypeI4;
+ case CORINFO_TYPE_LONG:
+ case CORINFO_TYPE_ULONG:
+ return InterpTypeI8;
+ case CORINFO_TYPE_NATIVEINT:
+ case CORINFO_TYPE_NATIVEUINT:
+ return InterpTypeI;
+ case CORINFO_TYPE_FLOAT:
+ return InterpTypeR4;
+ case CORINFO_TYPE_DOUBLE:
+ return InterpTypeR8;
+ case CORINFO_TYPE_STRING:
+ case CORINFO_TYPE_CLASS:
+ return InterpTypeO;
+ case CORINFO_TYPE_PTR:
+ return InterpTypeI;
+ case CORINFO_TYPE_BYREF:
+ return InterpTypeByRef;
+ case CORINFO_TYPE_VALUECLASS:
+ case CORINFO_TYPE_REFANY:
+ return InterpTypeVT;
+ case CORINFO_TYPE_VOID:
+ return InterpTypeVoid;
+ default:
+ assert(0);
+ break;
+ }
+ return InterpTypeVoid;
+}
+
+int32_t InterpCompiler::GetInterpTypeStackSize(CORINFO_CLASS_HANDLE clsHnd, InterpType interpType, int32_t *pAlign)
+{
+ int32_t size, align;
+ if (interpType == InterpTypeVT)
+ {
+ size = m_compHnd->getClassSize(clsHnd);
+ align = m_compHnd->getClassAlignmentRequirement(clsHnd);
+
+ assert(align <= INTERP_STACK_ALIGNMENT);
+
+ // All vars are stored at 8 byte aligned offsets
+ if (align < INTERP_STACK_SLOT_SIZE)
+ align = INTERP_STACK_SLOT_SIZE;
+ }
+ else
+ {
+ size = INTERP_STACK_SLOT_SIZE; // not really
+ align = INTERP_STACK_SLOT_SIZE;
+ }
+ *pAlign = align;
+ return size;
+}
+
+
+void InterpCompiler::CreateILVars()
+{
+ bool hasThis = m_methodInfo->args.hasThis();
+ int32_t offset, size, align;
+ int numArgs = hasThis + m_methodInfo->args.numArgs;
+ int numILLocals = m_methodInfo->locals.numArgs;
+ int numILVars = numArgs + numILLocals;
+
+ // add some starting extra space for new vars
+ m_varsCapacity = numILVars + 64;
+ m_pVars = (InterpVar*)AllocTemporary0(m_varsCapacity * sizeof (InterpVar));
+ m_varsSize = numILVars;
+
+ offset = 0;
+
+ INTERP_DUMP("\nCreate IL Vars:\n");
+
+ CORINFO_ARG_LIST_HANDLE sigArg = m_methodInfo->args.args;
+ for (int i = 0; i < numArgs; i++) {
+ InterpType interpType;
+ CORINFO_CLASS_HANDLE argClass;
+ if (hasThis && i == 0)
+ {
+ argClass = m_compHnd->getMethodClass(m_methodInfo->ftn);
+ if (m_compHnd->isValueClass(argClass))
+ interpType = InterpTypeByRef;
+ else
+ interpType = InterpTypeO;
+ }
+ else
+ {
+ CorInfoType argCorType;
+ argCorType = strip(m_compHnd->getArgType(&m_methodInfo->args, sigArg, &argClass));
+ interpType = GetInterpType(argCorType);
+ sigArg = m_compHnd->getArgNext(sigArg);
+ }
+ size = GetInterpTypeStackSize(argClass, interpType, &align);
+
+ new (&m_pVars[i]) InterpVar(interpType, argClass, size);
+
+ m_pVars[i].global = true;
+ m_pVars[i].ILGlobal = true;
+ m_pVars[i].size = size;
+ offset = ALIGN_UP_TO(offset, align);
+ m_pVars[i].offset = offset;
+ INTERP_DUMP("alloc arg var %d to offset %d\n", i, offset);
+ offset += size;
+ }
+
+ offset = ALIGN_UP_TO(offset, INTERP_STACK_ALIGNMENT);
+
+ sigArg = m_methodInfo->locals.args;
+ m_ILLocalsOffset = offset;
+ for (int i = 0; i < numILLocals; i++) {
+ int index = numArgs + i;
+ InterpType interpType;
+ CORINFO_CLASS_HANDLE argClass;
+
+ CorInfoType argCorType = strip(m_compHnd->getArgType(&m_methodInfo->locals, sigArg, &argClass));
+ interpType = GetInterpType(argCorType);
+ size = GetInterpTypeStackSize(argClass, interpType, &align);
+
+ new (&m_pVars[index]) InterpVar(interpType, argClass, size);
+
+ m_pVars[index].global = true;
+ m_pVars[index].ILGlobal = true;
+ offset = ALIGN_UP_TO(offset, align);
+ m_pVars[index].offset = offset;
+ INTERP_DUMP("alloc local var %d to offset %d\n", index, offset);
+ offset += size;
+ sigArg = m_compHnd->getArgNext(sigArg);
+ }
+ offset = ALIGN_UP_TO(offset, INTERP_STACK_ALIGNMENT);
+
+ m_ILLocalsSize = offset - m_ILLocalsOffset;
+ m_totalVarsStackSize = offset;
+}
- m_pCBB = m_pEntryBB = AllocBB();
+bool InterpCompiler::CreateBasicBlocks(CORINFO_METHOD_INFO* methodInfo)
+{
+ int32_t codeSize = methodInfo->ILCodeSize;
+ uint8_t *codeStart = methodInfo->ILCode;
+ uint8_t *codeEnd = codeStart + codeSize;
+ const uint8_t *ip = codeStart;
+
+ m_ppOffsetToBB = (InterpBasicBlock**)AllocMemPool0(sizeof(InterpBasicBlock*) * (methodInfo->ILCodeSize + 1));
+ GetBB(0);
+
+ for (unsigned int i = 0; i < methodInfo->EHcount; i++)
+ {
+ CORINFO_EH_CLAUSE clause;
+ m_compHnd->getEHinfo(methodInfo->ftn, i, &clause);
+
+ if ((codeStart + clause.TryOffset) > codeEnd ||
+ (codeStart + clause.TryOffset + clause.TryLength) > codeEnd)
+ {
+ return false;
+ }
+ GetBB(clause.TryOffset);
+
+ if ((codeStart + clause.HandlerOffset) > codeEnd ||
+ (codeStart + clause.HandlerOffset + clause.HandlerLength) > codeEnd)
+ {
+ return false;
+ }
+ GetBB(clause.HandlerOffset);
+
+ if (clause.Flags == CORINFO_EH_CLAUSE_FILTER)
+ {
+ if ((codeStart + clause.FilterOffset) > codeEnd)
+ return false;
+ GetBB(clause.FilterOffset);
+ }
+ }
while (ip < codeEnd)
{
- uint8_t opcode = *ip;
- switch (opcode)
+ int32_t insOffset = (int32_t)(ip - codeStart);
+ OPCODE opcode = CEEDecodeOpcode(&ip);
+ OPCODE_FORMAT opArgs = g_CEEOpArgs[opcode];
+ int32_t target;
+
+ switch (opArgs)
{
- case CEE_NOP:
- ip++;
- break;
- case CEE_LDC_I4_M1:
- case CEE_LDC_I4_0:
- case CEE_LDC_I4_1:
- case CEE_LDC_I4_2:
- case CEE_LDC_I4_3:
- case CEE_LDC_I4_4:
- case CEE_LDC_I4_5:
- case CEE_LDC_I4_6:
- case CEE_LDC_I4_7:
- case CEE_LDC_I4_8:
- AddIns(INTOP_LDC_I4);
- m_pLastIns->data[0] = opcode - CEE_LDC_I4_0;
- PushType(StackTypeI4, NULL);
- m_pLastIns->SetDVar(m_pStackPointer[-1].var);
- ip++;
- break;
- case CEE_LDC_I4_S:
- AddIns(INTOP_LDC_I4);
- m_pLastIns->data[0] = (int8_t)ip[1];
- PushType(StackTypeI4, NULL);
- m_pLastIns->SetDVar(m_pStackPointer[-1].var);
- ip += 2;
- break;
- case CEE_RET:
+ case InlineNone:
+ ip++;
+ break;
+ case InlineString:
+ case InlineType:
+ case InlineField:
+ case InlineMethod:
+ case InlineTok:
+ case InlineSig:
+ case ShortInlineR:
+ case InlineI:
+ ip += 5;
+ break;
+ case InlineVar:
+ ip += 3;
+ break;
+ case ShortInlineVar:
+ case ShortInlineI:
+ ip += 2;
+ break;
+ case ShortInlineBrTarget:
+ target = insOffset + 2 + (int8_t)ip [1];
+ if (target >= codeSize)
+ return false;
+ GetBB(target);
+ ip += 2;
+ GetBB((int32_t)(ip - codeStart));
+ break;
+ case InlineBrTarget:
+ target = insOffset + 5 + getI4LittleEndian(ip + 1);
+ if (target >= codeSize)
+ return false;
+ GetBB(target);
+ ip += 5;
+ GetBB((int32_t)(ip - codeStart));
+ break;
+ case InlineSwitch: {
+ uint32_t n = getI4LittleEndian(ip + 1);
+ ip += 5;
+ insOffset += 5 + 4 * n;
+ target = insOffset;
+ if (target >= codeSize)
+ return false;
+ GetBB(target);
+ for (uint32_t i = 0; i < n; i++)
{
- CORINFO_SIG_INFO sig = methodInfo->args;
- if (sig.retType == CORINFO_TYPE_VOID)
- {
- AddIns(INTOP_RET_VOID);
- }
- else if (sig.retType == CORINFO_TYPE_INT)
- {
- CHECK_STACK(1);
- AddIns(INTOP_RET);
- m_pStackPointer--;
- m_pLastIns->SetSVar(m_pStackPointer[0].var);
- }
- else
- {
- // FIXME
- assert(0);
- }
- ip++;
- break;
+ target = insOffset + getI4LittleEndian(ip);
+ if (target >= codeSize)
+ return false;
+ GetBB(target);
+ ip += 4;
}
- default:
- assert(0);
- break;
+ GetBB((int32_t)(ip - codeStart));
+ break;
+ }
+ case InlineR:
+ case InlineI8:
+ ip += 9;
+ break;
+ default:
+ assert(0);
}
+ if (opcode == CEE_THROW || opcode == CEE_ENDFINALLY || opcode == CEE_RETHROW)
+ GetBB((int32_t)(ip - codeStart));
}
-exit:
- return CORJIT_OK;
+ return true;
+}
+
+// ilOffset represents relative branch offset
+void InterpCompiler::EmitBranch(InterpOpcode opcode, int32_t ilOffset)
+{
+ int32_t target = (int32_t)(m_ip - m_pILCode) + ilOffset;
+ if (target < 0 || target >= m_ILCodeSize)
+ assert(0);
+
+ InterpBasicBlock *pTargetBB = m_ppOffsetToBB[target];
+ assert(pTargetBB != NULL);
+
+ EmitBBEndVarMoves(pTargetBB);
+ InitBBStackState(pTargetBB);
+
+ AddIns(opcode);
+ m_pLastIns->info.pTargetBB = pTargetBB;
+}
+
+void InterpCompiler::EmitOneArgBranch(InterpOpcode opcode, int32_t ilOffset, int insSize)
+{
+ CHECK_STACK_RET_VOID(1);
+ StackType argType = (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeByRef) ? StackTypeI : m_pStackPointer[-1].type;
+ // offset the opcode to obtain the type specific I4/I8/R4/R8 variant.
+ InterpOpcode opcodeArgType = (InterpOpcode)(opcode + argType - StackTypeI4);
+ m_pStackPointer--;
+ if (ilOffset)
+ {
+ EmitBranch(opcodeArgType, ilOffset + insSize);
+ m_pLastIns->SetSVar(m_pStackPointer[0].var);
+ }
+ else
+ {
+ AddIns(INTOP_NOP);
+ }
+}
+
+void InterpCompiler::EmitTwoArgBranch(InterpOpcode opcode, int32_t ilOffset, int insSize)
+{
+ CHECK_STACK_RET_VOID(2);
+ StackType argType1 = (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeByRef) ? StackTypeI : m_pStackPointer[-1].type;
+ StackType argType2 = (m_pStackPointer[-2].type == StackTypeO || m_pStackPointer[-2].type == StackTypeByRef) ? StackTypeI : m_pStackPointer[-2].type;
+
+ // Since branch opcodes only compare args of the same type, handle implicit conversions before
+ // emitting the conditional branch
+ if (argType1 == StackTypeI4 && argType2 == StackTypeI8)
+ {
+ EmitConv(m_pStackPointer - 1, m_pLastIns, StackTypeI8, INTOP_CONV_I8_I4);
+ argType1 = StackTypeI8;
+ }
+ else if (argType1 == StackTypeI8 && argType2 == StackTypeI4)
+ {
+ EmitConv(m_pStackPointer - 2, m_pLastIns, StackTypeI8, INTOP_CONV_I8_I4);
+ }
+ else if (argType1 == StackTypeR4 && argType2 == StackTypeR8)
+ {
+ EmitConv(m_pStackPointer - 1, m_pLastIns, StackTypeR8, INTOP_CONV_R8_R4);
+ argType1 = StackTypeR8;
+ }
+ else if (argType1 == StackTypeR8 && argType2 == StackTypeR4)
+ {
+ EmitConv(m_pStackPointer - 2, m_pLastIns, StackTypeR8, INTOP_CONV_R8_R4);
+ }
+ else if (argType1 != argType2)
+ {
+ m_hasInvalidCode = true;
+ return;
+ }
+
+ // offset the opcode to obtain the type specific I4/I8/R4/R8 variant.
+ InterpOpcode opcodeArgType = (InterpOpcode)(opcode + argType1 - StackTypeI4);
+ m_pStackPointer -= 2;
+
+ if (ilOffset)
+ {
+ EmitBranch(opcodeArgType, ilOffset + insSize);
+ m_pLastIns->SetSVars2(m_pStackPointer[0].var, m_pStackPointer[1].var);
+ }
+ else
+ {
+ AddIns(INTOP_NOP);
+ }
+}
+
+
+void InterpCompiler::EmitLoadVar(int32_t var)
+{
+ InterpType interpType = m_pVars[var].interpType;
+ int32_t size = m_pVars[var].size;
+ CORINFO_CLASS_HANDLE clsHnd = m_pVars[var].clsHnd;
+
+ if (interpType == InterpTypeVT)
+ PushTypeVT(clsHnd, size);
+ else
+ PushInterpType(interpType, clsHnd);
+
+ AddIns(InterpGetMovForType(interpType, true));
+ m_pLastIns->SetSVar(var);
+ m_pLastIns->SetDVar(m_pStackPointer[-1].var);
+ if (interpType == InterpTypeVT)
+ m_pLastIns->data[0] = size;
+}
+
+void InterpCompiler::EmitStoreVar(int32_t var)
+{
+ InterpType interpType = m_pVars[var].interpType;
+ CHECK_STACK_RET_VOID(1);
+
+#ifdef TARGET_64BIT
+ // nint and int32 can be used interchangeably. Add implicit conversions.
+ if (m_pStackPointer[-1].type == StackTypeI4 && g_stackTypeFromInterpType[interpType] == StackTypeI8)
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_I4);
+#endif
+ if (m_pStackPointer[-1].type == StackTypeR4 && g_stackTypeFromInterpType[interpType] == StackTypeR8)
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R8_R4);
+ else if (m_pStackPointer[-1].type == StackTypeR8 && g_stackTypeFromInterpType[interpType] == StackTypeR4)
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeR4, INTOP_CONV_R4_R8);
+
+ m_pStackPointer--;
+ AddIns(InterpGetMovForType(interpType, false));
+ m_pLastIns->SetSVar(m_pStackPointer[0].var);
+ m_pLastIns->SetDVar(var);
+ if (interpType == InterpTypeVT)
+ m_pLastIns->data[0] = m_pVars[var].size;
+}
+
+void InterpCompiler::EmitBinaryArithmeticOp(int32_t opBase)
+{
+ CHECK_STACK_RET_VOID(2);
+ StackType type1 = m_pStackPointer[-2].type;
+ StackType type2 = m_pStackPointer[-1].type;
+
+ StackType typeRes;
+
+ if (opBase == INTOP_ADD_I4 && (type1 == StackTypeByRef || type2 == StackTypeByRef))
+ {
+ if (type1 == type2)
+ INVALID_CODE_RET_VOID;
+ if (type1 == StackTypeByRef)
+ {
+ if (type2 == StackTypeI4)
+ {
+#ifdef TARGET_64BIT
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_I4);
+ type2 = StackTypeI8;
+#endif
+ typeRes = StackTypeByRef;
+ }
+ else if (type2 == StackTypeI)
+ {
+ typeRes = StackTypeByRef;
+ }
+ else
+ {
+ INVALID_CODE_RET_VOID;
+ }
+ }
+ else
+ {
+ // type2 == StackTypeByRef
+ if (type1 == StackTypeI4)
+ {
+#ifdef TARGET_64BIT
+ EmitConv(m_pStackPointer - 2, NULL, StackTypeI8, INTOP_CONV_I8_I4);
+ type1 = StackTypeI8;
+#endif
+ typeRes = StackTypeByRef;
+ }
+ else if (type1 == StackTypeI)
+ {
+ typeRes = StackTypeByRef;
+ }
+ else
+ {
+ INVALID_CODE_RET_VOID;
+ }
+ }
+ }
+ else if (opBase == INTOP_SUB_I4 && type1 == StackTypeByRef)
+ {
+ if (type2 == StackTypeI4)
+ {
+#ifdef TARGET_64BIT
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_I4);
+ type2 = StackTypeI8;
+#endif
+ typeRes = StackTypeByRef;
+ }
+ else if (type2 == StackTypeI)
+ {
+ typeRes = StackTypeByRef;
+ }
+ else if (type2 == StackTypeByRef)
+ {
+ typeRes = StackTypeI;
+ }
+ else
+ {
+ INVALID_CODE_RET_VOID;
+ }
+ }
+ else
+ {
+#if SIZEOF_VOID_P == 8
+ if (type1 == StackTypeI8 && type2 == StackTypeI4)
+ {
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_I4);
+ type2 = StackTypeI8;
+ }
+ else if (type1 == StackTypeI4 && type2 == StackTypeI8)
+ {
+ EmitConv(m_pStackPointer - 2, NULL, StackTypeI8, INTOP_CONV_I8_I4);
+ type1 = StackTypeI8;
+ }
+#endif
+ if (type1 == StackTypeR8 && type2 == StackTypeR4)
+ {
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R8_R4);
+ type2 = StackTypeR8;
+ }
+ else if (type1 == StackTypeR4 && type2 == StackTypeR8)
+ {
+ EmitConv(m_pStackPointer - 2, NULL, StackTypeR8, INTOP_CONV_R8_R4);
+ type1 = StackTypeR8;
+ }
+ if (type1 != type2)
+ INVALID_CODE_RET_VOID;
+
+ typeRes = type1;
+ }
+
+ // The argument opcode is for the base _I4 instruction. Depending on the type of the result
+ // we compute the specific variant, _I4/_I8/_R4 or R8.
+ int32_t typeOffset = ((typeRes == StackTypeByRef) ? StackTypeI : typeRes) - StackTypeI4;
+ int32_t finalOpcode = opBase + typeOffset;
+
+ m_pStackPointer -= 2;
+ AddIns(finalOpcode);
+ m_pLastIns->SetSVars2(m_pStackPointer[0].var, m_pStackPointer[1].var);
+ PushStackType(typeRes, NULL);
+ m_pLastIns->SetDVar(m_pStackPointer[-1].var);
+}
+
+void InterpCompiler::EmitUnaryArithmeticOp(int32_t opBase)
+{
+ CHECK_STACK_RET_VOID(1);
+ StackType stackType = m_pStackPointer[-1].type;
+ int32_t finalOpcode = opBase + (stackType - StackTypeI4);
+
+ if (stackType == StackTypeByRef || stackType == StackTypeO)
+ INVALID_CODE_RET_VOID;
+ if (opBase == INTOP_NOT_I4 && (stackType != StackTypeI4 && stackType != StackTypeI8))
+ INVALID_CODE_RET_VOID;
+
+ m_pStackPointer--;
+ AddIns(finalOpcode);
+ m_pLastIns->SetSVar(m_pStackPointer[0].var);
+ PushStackType(stackType, NULL);
+ m_pLastIns->SetDVar(m_pStackPointer[-1].var);
+}
+
+void InterpCompiler::EmitShiftOp(int32_t opBase)
+{
+ CHECK_STACK_RET_VOID(2);
+ StackType stackType = m_pStackPointer[-2].type;
+ StackType shiftAmountType = m_pStackPointer[-1].type;
+ int32_t typeOffset = stackType - StackTypeI4;
+ int32_t finalOpcode = opBase + typeOffset;
+
+ if ((stackType != StackTypeI4 && stackType != StackTypeI8) ||
+ (shiftAmountType != StackTypeI4 && shiftAmountType != StackTypeI))
+ INVALID_CODE_RET_VOID;
+
+ m_pStackPointer -= 2;
+ AddIns(finalOpcode);
+ m_pLastIns->SetSVars2(m_pStackPointer[0].var, m_pStackPointer[1].var);
+ PushStackType(stackType, NULL);
+ m_pLastIns->SetDVar(m_pStackPointer[-1].var);
+}
+
+void InterpCompiler::EmitCompareOp(int32_t opBase)
+{
+ CHECK_STACK_RET_VOID(2);
+ if (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeByRef)
+ {
+ AddIns(opBase + StackTypeI - StackTypeI4);
+ }
+ else
+ {
+ if (m_pStackPointer[-1].type == StackTypeR4 && m_pStackPointer[-2].type == StackTypeR8)
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R8_R4);
+ if (m_pStackPointer[-1].type == StackTypeR8 && m_pStackPointer[-2].type == StackTypeR4)
+ EmitConv(m_pStackPointer - 2, NULL, StackTypeR8, INTOP_CONV_R8_R4);
+ AddIns(opBase + m_pStackPointer[-1].type - StackTypeI4);
+ }
+ m_pStackPointer -= 2;
+ m_pLastIns->SetSVars2(m_pStackPointer[0].var, m_pStackPointer[1].var);
+ PushStackType(StackTypeI4, NULL);
+ m_pLastIns->SetDVar(m_pStackPointer[-1].var);
+}
+
+int32_t InterpCompiler::GetDataItemIndex(void *data)
+{
+ int32_t index = m_dataItems.Find(data);
+ if (index != -1)
+ return index;
+
+ return m_dataItems.Add(data);
+}
+
+int32_t InterpCompiler::GetMethodDataItemIndex(CORINFO_METHOD_HANDLE mHandle)
+{
+ size_t data = (size_t)mHandle | INTERP_METHOD_DESC_TAG;
+ return GetDataItemIndex((void*)data);
+}
+
+bool InterpCompiler::EmitCallIntrinsics(CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO sig)
+{
+ const char *className = NULL;
+ const char *namespaceName = NULL;
+ const char *methodName = m_compHnd->getMethodNameFromMetadata(method, &className, &namespaceName, NULL, 0);
+ int32_t opcode = -1;
+
+ if (namespaceName && !strcmp(namespaceName, "System"))
+ {
+ if (className && !strcmp(className, "Environment"))
+ {
+ if (methodName && !strcmp(methodName, "FailFast"))
+ opcode = INTOP_FAILFAST; // to be removed, not really an intrisic
+ }
+ }
+
+ if (opcode != -1)
+ {
+ AddIns(opcode);
+ return true;
+ }
+
+ return false;
+}
+
+void InterpCompiler::EmitCall(CORINFO_CLASS_HANDLE constrainedClass, bool readonly, bool tailcall)
+{
+ uint32_t token = getU4LittleEndian(m_ip + 1);
+ CORINFO_RESOLVED_TOKEN resolvedToken;
+
+ resolvedToken.tokenScope = m_compScopeHnd;
+ resolvedToken.tokenContext = METHOD_BEING_COMPILED_CONTEXT();
+ resolvedToken.token = token;
+ resolvedToken.tokenType = CORINFO_TOKENKIND_Method;
+ m_compHnd->resolveToken(&resolvedToken);
+
+ CORINFO_METHOD_HANDLE targetMethod = resolvedToken.hMethod;
+
+ CORINFO_SIG_INFO targetSignature;
+ m_compHnd->getMethodSig(targetMethod, &targetSignature);
+
+ if (EmitCallIntrinsics(targetMethod, targetSignature))
+ {
+ m_ip += 5;
+ return;
+ }
+
+ // Process sVars
+ int numArgs = targetSignature.numArgs + targetSignature.hasThis();
+ m_pStackPointer -= numArgs;
+
+ int *callArgs = (int*) AllocMemPool((numArgs + 1) * sizeof(int));
+ for (int i = 0; i < numArgs; i++)
+ callArgs[i] = m_pStackPointer [i].var;
+ callArgs[numArgs] = -1;
+
+ // Process dVar
+ int32_t dVar;
+ if (targetSignature.retType != CORINFO_TYPE_VOID)
+ {
+ InterpType interpType = GetInterpType(targetSignature.retType);
+
+ if (interpType == InterpTypeVT)
+ {
+ int32_t size = m_compHnd->getClassSize(targetSignature.retTypeClass);
+ PushTypeVT(targetSignature.retTypeClass, size);
+ }
+ else
+ {
+ PushInterpType(interpType, NULL);
+ }
+ dVar = m_pStackPointer[-1].var;
+ }
+ else
+ {
+ // Create a new dummy var to serve as the dVar of the call
+ // FIXME Consider adding special dVar type (ex -1), that is
+ // resolved to null offset. The opcode shouldn't really write to it
+ PushStackType(StackTypeI4, NULL);
+ m_pStackPointer--;
+ dVar = m_pStackPointer[0].var;
+ }
+
+ // Emit call instruction
+ AddIns(INTOP_CALL);
+ m_pLastIns->SetDVar(dVar);
+ m_pLastIns->SetSVar(CALL_ARGS_SVAR);
+ m_pLastIns->data[0] = GetMethodDataItemIndex(targetMethod);
+
+ m_pLastIns->flags |= INTERP_INST_FLAG_CALL;
+ m_pLastIns->info.pCallInfo = (InterpCallInfo*)AllocMemPool0(sizeof (InterpCallInfo));
+ m_pLastIns->info.pCallInfo->pCallArgs = callArgs;
+
+ m_ip += 5;
+}
+
+int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo)
+{
+ bool readonly = false;
+ bool tailcall = false;
+ CORINFO_CLASS_HANDLE constrainedClass = NULL;
+ uint8_t *codeEnd;
+ int numArgs = m_methodInfo->args.hasThis() + m_methodInfo->args.numArgs;
+ bool emittedBBlocks, linkBBlocks, needsRetryEmit;
+ m_ip = m_pILCode = methodInfo->ILCode;
+ m_ILCodeSize = (int32_t)methodInfo->ILCodeSize;
+
+ m_stackCapacity = methodInfo->maxStack + 1;
+ m_pStackBase = m_pStackPointer = (StackInfo*)AllocTemporary(sizeof(StackInfo) * m_stackCapacity);
+
+ m_pEntryBB = AllocBB(0);
+ m_pEntryBB->emitState = BBStateEmitting;
+ m_pEntryBB->stackHeight = 0;
+ m_pCBB = m_pEntryBB;
+
+ if (!CreateBasicBlocks(methodInfo))
+ {
+ m_hasInvalidCode = true;
+ goto exit_bad_code;
+ }
+
+ codeEnd = m_ip + m_ILCodeSize;
+
+ linkBBlocks = true;
+ needsRetryEmit = false;
+retry_emit:
+ emittedBBlocks = false;
+ while (m_ip < codeEnd)
+ {
+ // Check here for every opcode to avoid code bloat
+ if (m_hasInvalidCode)
+ goto exit_bad_code;
+
+ int32_t insOffset = (int32_t)(m_ip - m_pILCode);
+ m_currentILOffset = insOffset;
+
+ InterpBasicBlock *pNewBB = m_ppOffsetToBB[insOffset];
+ if (pNewBB != NULL && m_pCBB != pNewBB)
+ {
+ INTERP_DUMP("BB%d (IL_%04x):\n", pNewBB->index, pNewBB->ilOffset);
+ // If we were emitting into previous bblock, we are finished now
+ if (m_pCBB->emitState == BBStateEmitting)
+ m_pCBB->emitState = BBStateEmitted;
+ // If the new bblock was already emitted, skip its instructions
+ if (pNewBB->emitState == BBStateEmitted)
+ {
+ if (linkBBlocks)
+ {
+ LinkBBs(m_pCBB, pNewBB);
+ // Further emitting can only start at a point where the bblock is not fallthrough
+ linkBBlocks = false;
+ }
+ // If the bblock was fully emitted it means we already iterated at least once over
+ // all instructions so we have `pNextBB` initialized, unless it is the last bblock.
+ // Skip through all emitted bblocks.
+ m_pCBB = pNewBB;
+ while (m_pCBB->pNextBB && m_pCBB->pNextBB->emitState == BBStateEmitted)
+ m_pCBB = m_pCBB->pNextBB;
+
+ if (m_pCBB->pNextBB)
+ m_ip = m_pILCode + m_pCBB->pNextBB->ilOffset;
+ else
+ m_ip = codeEnd;
+
+ continue;
+ }
+ else
+ {
+ assert (pNewBB->emitState == BBStateNotEmitted);
+ }
+ // We are starting a new basic block. Change cbb and link them together
+ if (linkBBlocks)
+ {
+ // By default we link cbb with the new starting bblock, unless the previous
+ // instruction is an unconditional branch (BR, LEAVE, ENDFINALLY)
+ LinkBBs(m_pCBB, pNewBB);
+ EmitBBEndVarMoves(pNewBB);
+ pNewBB->emitState = BBStateEmitting;
+ emittedBBlocks = true;
+ if (pNewBB->stackHeight >= 0)
+ {
+ MergeStackTypeInfo(m_pStackBase, pNewBB->pStackState, pNewBB->stackHeight);
+ // This is relevant only for copying the vars associated with the values on the stack
+ memcpy(m_pStackBase, pNewBB->pStackState, pNewBB->stackHeight * sizeof(StackInfo));
+ m_pStackPointer = m_pStackBase + pNewBB->stackHeight;
+ }
+ else
+ {
+ // This bblock has not been branched to yet. Initialize its stack state
+ InitBBStackState(pNewBB);
+ }
+ // linkBBlocks remains true, which is the default
+ }
+ else
+ {
+ if (pNewBB->stackHeight >= 0)
+ {
+ // This is relevant only for copying the vars associated with the values on the stack
+ memcpy (m_pStackBase, pNewBB->pStackState, pNewBB->stackHeight * sizeof(StackInfo));
+ m_pStackPointer = m_pStackBase + pNewBB->stackHeight;
+ pNewBB->emitState = BBStateEmitting;
+ emittedBBlocks = true;
+ linkBBlocks = true;
+ }
+ else
+ {
+ INTERP_DUMP("BB%d without initialized stack\n", pNewBB->index);
+ assert(pNewBB->emitState == BBStateNotEmitted);
+ needsRetryEmit = true;
+ // linking to its next bblock, if its the case, will only happen
+ // after we actually emit the bblock
+ linkBBlocks = false;
+ // If we had pNewBB->pNextBB initialized, here we could skip to its il offset directly.
+ // We will just skip all instructions instead, since it doesn't seem that problematic.
+ }
+ }
+ if (!m_pCBB->pNextBB)
+ m_pCBB->pNextBB = pNewBB;
+ m_pCBB = pNewBB;
+ }
+
+ int32_t opcodeSize = CEEOpcodeSize(m_ip, codeEnd);
+ if (m_pCBB->emitState != BBStateEmitting)
+ {
+ // If we are not really emitting, just skip the instructions in the bblock
+ m_ip += opcodeSize;
+ continue;
+ }
+
+ m_ppOffsetToBB[insOffset] = m_pCBB;
+
+#ifdef DEBUG
+ if (m_verbose)
+ {
+ const uint8_t *ip = m_ip;
+ printf("IL_%04x %-10s, sp %d, %s",
+ (int32_t)(m_ip - m_pILCode),
+ CEEOpName(CEEDecodeOpcode(&ip)), (int32_t)(m_pStackPointer - m_pStackBase),
+ m_pStackPointer > m_pStackBase ? g_stackTypeString[m_pStackPointer[-1].type] : " ");
+ if (m_pStackPointer > m_pStackBase &&
+ (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeVT) &&
+ m_pStackPointer[-1].clsHnd != NULL)
+ PrintClassName(m_pStackPointer[-1].clsHnd);
+ printf("\n");
+ }
+#endif
+
+ uint8_t opcode = *m_ip;
+ switch (opcode)
+ {
+ case CEE_NOP:
+ m_ip++;
+ break;
+ case CEE_LDC_I4_M1:
+ case CEE_LDC_I4_0:
+ case CEE_LDC_I4_1:
+ case CEE_LDC_I4_2:
+ case CEE_LDC_I4_3:
+ case CEE_LDC_I4_4:
+ case CEE_LDC_I4_5:
+ case CEE_LDC_I4_6:
+ case CEE_LDC_I4_7:
+ case CEE_LDC_I4_8:
+ AddIns(INTOP_LDC_I4);
+ m_pLastIns->data[0] = opcode - CEE_LDC_I4_0;
+ PushStackType(StackTypeI4, NULL);
+ m_pLastIns->SetDVar(m_pStackPointer[-1].var);
+ m_ip++;
+ break;
+ case CEE_LDC_I4_S:
+ AddIns(INTOP_LDC_I4);
+ m_pLastIns->data[0] = (int8_t)m_ip[1];
+ PushStackType(StackTypeI4, NULL);
+ m_pLastIns->SetDVar(m_pStackPointer[-1].var);
+ m_ip += 2;
+ break;
+ case CEE_LDC_I4:
+ AddIns(INTOP_LDC_I4);
+ m_pLastIns->data[0] = getI4LittleEndian(m_ip + 1);
+ PushStackType(StackTypeI4, NULL);
+ m_pLastIns->SetDVar(m_pStackPointer[-1].var);
+ m_ip += 5;
+ break;
+ case CEE_LDNULL:
+ AddIns(INTOP_LDNULL);
+ PushStackType(StackTypeO, NULL);
+ m_pLastIns->SetDVar(m_pStackPointer[-1].var);
+ m_ip++;
+ break;
+
+ case CEE_LDARG_S:
+ EmitLoadVar(m_ip[1]);
+ m_ip += 2;
+ break;
+ case CEE_LDARG_0:
+ case CEE_LDARG_1:
+ case CEE_LDARG_2:
+ case CEE_LDARG_3:
+ EmitLoadVar(*m_ip - CEE_LDARG_0);
+ m_ip++;
+ break;
+ case CEE_STARG_S:
+ EmitStoreVar(m_ip[1]);
+ m_ip += 2;
+ break;
+ case CEE_LDLOC_S:
+ EmitLoadVar(numArgs + m_ip[1]);
+ m_ip += 2;
+ break;
+ case CEE_LDLOC_0:
+ case CEE_LDLOC_1:
+ case CEE_LDLOC_2:
+ case CEE_LDLOC_3:
+ EmitLoadVar(numArgs + *m_ip - CEE_LDLOC_0);
+ m_ip++;
+ break;
+ case CEE_STLOC_S:
+ EmitStoreVar(numArgs + m_ip[1]);
+ m_ip += 2;
+ break;
+ case CEE_STLOC_0:
+ case CEE_STLOC_1:
+ case CEE_STLOC_2:
+ case CEE_STLOC_3:
+ EmitStoreVar(numArgs + *m_ip - CEE_STLOC_0);
+ m_ip++;
+ break;
+
+ case CEE_RET:
+ {
+ CORINFO_SIG_INFO sig = methodInfo->args;
+ InterpType retType = GetInterpType(sig.retType);
+
+ if (retType == InterpTypeVoid)
+ {
+ AddIns(INTOP_RET_VOID);
+ }
+ else if (retType == InterpTypeVT)
+ {
+ CHECK_STACK(1);
+ AddIns(INTOP_RET_VT);
+ m_pStackPointer--;
+ int32_t retVar = m_pStackPointer[0].var;
+ m_pLastIns->SetSVar(retVar);
+ m_pLastIns->data[0] = m_pVars[retVar].size;
+ }
+ else
+ {
+ CHECK_STACK(1);
+ AddIns(INTOP_RET);
+ m_pStackPointer--;
+ m_pLastIns->SetSVar(m_pStackPointer[0].var);
+ }
+ m_ip++;
+ break;
+ }
+ case CEE_CONV_U1:
+ CHECK_STACK(1);
+ switch (m_pStackPointer[-1].type)
+ {
+ case StackTypeR4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U1_R4);
+ break;
+ case StackTypeR8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U1_R8);
+ break;
+ case StackTypeI4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U1_I4);
+ break;
+ case StackTypeI8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U1_I8);
+ break;
+ default:
+ assert(0);
+ }
+ m_ip++;
+ break;
+ case CEE_CONV_I1:
+ CHECK_STACK(1);
+ switch (m_pStackPointer[-1].type)
+ {
+ case StackTypeR4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I1_R4);
+ break;
+ case StackTypeR8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I1_R8);
+ break;
+ case StackTypeI4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I1_I4);
+ break;
+ case StackTypeI8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I1_I8);
+ break;
+ default:
+ assert(0);
+ }
+ m_ip++;
+ break;
+ case CEE_CONV_U2:
+ CHECK_STACK(1);
+ switch (m_pStackPointer[-1].type)
+ {
+ case StackTypeR4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U2_R4);
+ break;
+ case StackTypeR8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U2_R8);
+ break;
+ case StackTypeI4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U2_I4);
+ break;
+ case StackTypeI8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U2_I8);
+ break;
+ default:
+ assert(0);
+ }
+ m_ip++;
+ break;
+ case CEE_CONV_I2:
+ CHECK_STACK(1);
+ switch (m_pStackPointer[-1].type)
+ {
+ case StackTypeR4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I2_R4);
+ break;
+ case StackTypeR8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I2_R8);
+ break;
+ case StackTypeI4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I2_I4);
+ break;
+ case StackTypeI8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I2_I8);
+ break;
+ default:
+ assert(0);
+ }
+ m_ip++;
+ break;
+ case CEE_CONV_U:
+ CHECK_STACK(1);
+ switch (m_pStackPointer[-1].type)
+ {
+ case StackTypeR8:
+#ifdef TARGET_64BIT
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_U8_R8);
+#else
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_U4_R8);
+#endif
+ break;
+ case StackTypeR4:
+#ifdef TARGET_64BIT
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_U8_R4);
+#else
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_U4_R4);
+#endif
+ break;
+ case StackTypeI4:
+#ifdef TARGET_64BIT
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_I8_U4);
+#endif
+ break;
+ case StackTypeI8:
+#ifndef TARGET_64BIT
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_MOV_8);
+#endif
+ break;
+ case StackTypeByRef:
+ case StackTypeO:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_MOV_8);
+ break;
+ default:
+ assert(0);
+ }
+ m_ip++;
+ break;
+ case CEE_CONV_I:
+ CHECK_STACK(1);
+ switch (m_pStackPointer[-1].type)
+ {
+ case StackTypeR8:
+#ifdef TARGET_64BIT
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_I8_R8);
+#else
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_I4_R8);
+#endif
+ break;
+ case StackTypeR4:
+#ifdef TARGET_64BIT
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_I8_R4);
+#else
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_I4_R4);
+#endif
+ break;
+ case StackTypeI4:
+#ifdef TARGET_64BIT
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_I8_I4);
+#endif
+ break;
+ case StackTypeO:
+ case StackTypeByRef:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_MOV_8);
+ break;
+ case StackTypeI8:
+#ifndef TARGET_64BIT
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_MOV_8);
+#endif
+ break;
+ default:
+ assert(0);
+ }
+ m_ip++;
+ break;
+ case CEE_CONV_U4:
+ CHECK_STACK(1);
+ switch (m_pStackPointer[-1].type)
+ {
+ case StackTypeR4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U4_R4);
+ break;
+ case StackTypeR8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U4_R8);
+ break;
+ case StackTypeI4:
+ break;
+ case StackTypeI8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_MOV_8);
+ break;
+ case StackTypeByRef:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_MOV_P);
+ break;
+ default:
+ assert(0);
+ }
+ m_ip++;
+ break;
+ case CEE_CONV_I4:
+ CHECK_STACK(1);
+ switch (m_pStackPointer[-1].type)
+ {
+ case StackTypeR4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I4_R4);
+ break;
+ case StackTypeR8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I4_R8);
+ break;
+ case StackTypeI4:
+ break;
+ case StackTypeI8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_MOV_8);
+ break;
+ case StackTypeByRef:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_MOV_P);
+ break;
+ default:
+ assert(0);
+ }
+ m_ip++;
+ break;
+ case CEE_CONV_I8:
+ CHECK_STACK(1);
+ switch (m_pStackPointer[-1].type)
+ {
+ case StackTypeR4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_R4);
+ break;
+ case StackTypeR8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_R8);
+ break;
+ case StackTypeI4: {
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_I4);
+ break;
+ }
+ case StackTypeI8:
+ break;
+ case StackTypeByRef:
+#ifdef TARGET_64BIT
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_MOV_8);
+#else
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_I4);
+#endif
+ break;
+ default:
+ assert(0);
+ }
+ m_ip++;
+ break;
+ case CEE_CONV_R4:
+ CHECK_STACK(1);
+ switch (m_pStackPointer[-1].type)
+ {
+ case StackTypeR8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeR4, INTOP_CONV_R4_R8);
+ break;
+ case StackTypeI8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeR4, INTOP_CONV_R4_I8);
+ break;
+ case StackTypeI4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeR4, INTOP_CONV_R4_I4);
+ break;
+ case StackTypeR4:
+ break;
+ default:
+ assert(0);
+ }
+ m_ip++;
+ break;
+ case CEE_CONV_R8:
+ CHECK_STACK(1);
+ switch (m_pStackPointer[-1].type)
+ {
+ case StackTypeI4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R8_I4);
+ break;
+ case StackTypeI8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R8_I8);
+ break;
+ case StackTypeR4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R8_R4);
+ break;
+ case StackTypeR8:
+ break;
+ default:
+ assert(0);
+ }
+ m_ip++;
+ break;
+ case CEE_CONV_U8:
+ CHECK_STACK(1);
+ switch (m_pStackPointer[-1].type)
+ {
+ case StackTypeI4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_U4);
+ break;
+ case StackTypeI8:
+ break;
+ case StackTypeR4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_U8_R4);
+ break;
+ case StackTypeR8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_U8_R8);
+ break;
+ case StackTypeByRef:
+#ifdef TARGET_64BIT
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_MOV_8);
+#else
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_U4);
+#endif
+ break;
+ default:
+ assert(0);
+ }
+ m_ip++;
+ break;
+ case CEE_CONV_R_UN:
+ switch (m_pStackPointer[-1].type)
+ {
+ case StackTypeR4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R8_R4);
+ break;
+ case StackTypeR8:
+ break;
+ case StackTypeI8:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R_UN_I8);
+ break;
+ case StackTypeI4:
+ EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R_UN_I4);
+ break;
+ default:
+ assert(0);
+ }
+ m_ip++;
+ break;
+ case CEE_SWITCH:
+ {
+ m_ip++;
+ uint32_t n = getU4LittleEndian(m_ip);
+ // Format of switch instruction is opcode + srcVal + n + T1 + T2 + ... + Tn
+ AddInsExplicit(INTOP_SWITCH, n + 3);
+ m_pLastIns->data[0] = n;
+ m_ip += 4;
+ const uint8_t *nextIp = m_ip + n * 4;
+ m_pStackPointer--;
+ m_pLastIns->SetSVar(m_pStackPointer->var);
+ InterpBasicBlock **targetBBTable = (InterpBasicBlock**)AllocMemPool(sizeof (InterpBasicBlock*) * n);
+
+ for (uint32_t i = 0; i < n; i++)
+ {
+ int32_t offset = getU4LittleEndian(m_ip);
+ uint32_t target = (uint32_t)(nextIp - m_pILCode + offset);
+ InterpBasicBlock *targetBB = m_ppOffsetToBB[target];
+ assert(targetBB);
+
+ InitBBStackState(targetBB);
+ targetBBTable[i] = targetBB;
+ LinkBBs(m_pCBB, targetBB);
+ m_ip += 4;
+ }
+ m_pLastIns->info.ppTargetBBTable = targetBBTable;
+ break;
+ }
+ case CEE_BR:
+ {
+ int32_t offset = getI4LittleEndian(m_ip + 1);
+ if (offset)
+ {
+ EmitBranch(INTOP_BR, 5 + offset);
+ linkBBlocks = false;
+ }
+ m_ip += 5;
+ break;
+ }
+ case CEE_BR_S:
+ {
+ int32_t offset = (int8_t)m_ip [1];
+ if (offset)
+ {
+ EmitBranch(INTOP_BR, 2 + (int8_t)m_ip [1]);
+ linkBBlocks = false;
+ }
+ m_ip += 2;
+ break;
+ }
+ case CEE_BRFALSE:
+ EmitOneArgBranch(INTOP_BRFALSE_I4, getI4LittleEndian(m_ip + 1), 5);
+ m_ip += 5;
+ break;
+ case CEE_BRFALSE_S:
+ EmitOneArgBranch(INTOP_BRFALSE_I4, (int8_t)m_ip [1], 2);
+ m_ip += 2;
+ break;
+ case CEE_BRTRUE:
+ EmitOneArgBranch(INTOP_BRTRUE_I4, getI4LittleEndian(m_ip + 1), 5);
+ m_ip += 5;
+ break;
+ case CEE_BRTRUE_S:
+ EmitOneArgBranch(INTOP_BRTRUE_I4, (int8_t)m_ip [1], 2);
+ m_ip += 2;
+ break;
+ case CEE_BEQ:
+ EmitTwoArgBranch(INTOP_BEQ_I4, getI4LittleEndian(m_ip + 1), 5);
+ m_ip += 5;
+ break;
+ case CEE_BEQ_S:
+ EmitTwoArgBranch(INTOP_BEQ_I4, (int8_t)m_ip [1], 2);
+ m_ip += 2;
+ break;
+ case CEE_BGE:
+ EmitTwoArgBranch(INTOP_BGE_I4, getI4LittleEndian(m_ip + 1), 5);
+ m_ip += 5;
+ break;
+ case CEE_BGE_S:
+ EmitTwoArgBranch(INTOP_BGE_I4, (int8_t)m_ip [1], 2);
+ m_ip += 2;
+ break;
+ case CEE_BGT:
+ EmitTwoArgBranch(INTOP_BGT_I4, getI4LittleEndian(m_ip + 1), 5);
+ m_ip += 5;
+ break;
+ case CEE_BGT_S:
+ EmitTwoArgBranch(INTOP_BGT_I4, (int8_t)m_ip [1], 2);
+ m_ip += 2;
+ break;
+ case CEE_BLT:
+ EmitTwoArgBranch(INTOP_BLT_I4, getI4LittleEndian(m_ip + 1), 5);
+ m_ip += 5;
+ break;
+ case CEE_BLT_S:
+ EmitTwoArgBranch(INTOP_BLT_I4, (int8_t)m_ip [1], 2);
+ m_ip += 2;
+ break;
+ case CEE_BLE:
+ EmitTwoArgBranch(INTOP_BLE_I4, getI4LittleEndian(m_ip + 1), 5);
+ m_ip += 5;
+ break;
+ case CEE_BLE_S:
+ EmitTwoArgBranch(INTOP_BLE_I4, (int8_t)m_ip [1], 2);
+ m_ip += 2;
+ break;
+ case CEE_BNE_UN:
+ EmitTwoArgBranch(INTOP_BNE_UN_I4, getI4LittleEndian(m_ip + 1), 5);
+ m_ip += 5;
+ break;
+ case CEE_BNE_UN_S:
+ EmitTwoArgBranch(INTOP_BNE_UN_I4, (int8_t)m_ip [1], 2);
+ m_ip += 2;
+ break;
+ case CEE_BGE_UN:
+ EmitTwoArgBranch(INTOP_BGE_UN_I4, getI4LittleEndian(m_ip + 1), 5);
+ m_ip += 5;
+ break;
+ case CEE_BGE_UN_S:
+ EmitTwoArgBranch(INTOP_BGE_UN_I4, (int8_t)m_ip [1], 2);
+ m_ip += 2;
+ break;
+ case CEE_BGT_UN:
+ EmitTwoArgBranch(INTOP_BGT_UN_I4, getI4LittleEndian(m_ip + 1), 5);
+ m_ip += 5;
+ break;
+ case CEE_BGT_UN_S:
+ EmitTwoArgBranch(INTOP_BGT_UN_I4, (int8_t)m_ip [1], 2);
+ m_ip += 2;
+ break;
+ case CEE_BLE_UN:
+ EmitTwoArgBranch(INTOP_BLE_UN_I4, getI4LittleEndian(m_ip + 1), 5);
+ m_ip += 5;
+ break;
+ case CEE_BLE_UN_S:
+ EmitTwoArgBranch(INTOP_BLE_UN_I4, (int8_t)m_ip [1], 2);
+ m_ip += 2;
+ break;
+ case CEE_BLT_UN:
+ EmitTwoArgBranch(INTOP_BLT_UN_I4, getI4LittleEndian(m_ip + 1), 5);
+ m_ip += 5;
+ break;
+ case CEE_BLT_UN_S:
+ EmitTwoArgBranch(INTOP_BLT_UN_I4, (int8_t)m_ip [1], 2);
+ m_ip += 2;
+ break;
+
+ case CEE_ADD:
+ EmitBinaryArithmeticOp(INTOP_ADD_I4);
+ m_ip++;
+ break;
+ case CEE_SUB:
+ EmitBinaryArithmeticOp(INTOP_SUB_I4);
+ m_ip++;
+ break;
+ case CEE_MUL:
+ EmitBinaryArithmeticOp(INTOP_MUL_I4);
+ m_ip++;
+ break;
+ case CEE_AND:
+ EmitBinaryArithmeticOp(INTOP_AND_I4);
+ m_ip++;
+ break;
+ case CEE_OR:
+ EmitBinaryArithmeticOp(INTOP_OR_I4);
+ m_ip++;
+ break;
+ case CEE_XOR:
+ EmitBinaryArithmeticOp(INTOP_XOR_I4);
+ m_ip++;
+ break;
+ case CEE_SHL:
+ EmitShiftOp(INTOP_SHL_I4);
+ m_ip++;
+ break;
+ case CEE_SHR:
+ EmitShiftOp(INTOP_SHR_I4);
+ m_ip++;
+ break;
+ case CEE_SHR_UN:
+ EmitShiftOp(INTOP_SHR_UN_I4);
+ m_ip++;
+ break;
+ case CEE_NEG:
+ EmitUnaryArithmeticOp(INTOP_NEG_I4);
+ m_ip++;
+ break;
+ case CEE_NOT:
+ EmitUnaryArithmeticOp(INTOP_NOT_I4);
+ m_ip++;
+ break;
+ case CEE_CALL:
+ EmitCall(constrainedClass, readonly, tailcall);
+ constrainedClass = NULL;
+ readonly = false;
+ tailcall = false;
+ break;
+
+ case CEE_PREFIX1:
+ m_ip++;
+ switch (*m_ip + 256)
+ {
+ case CEE_LDARG:
+ EmitLoadVar(getU2LittleEndian(m_ip + 1));
+ m_ip += 3;
+ break;
+ case CEE_STARG:
+ EmitStoreVar(getU2LittleEndian(m_ip + 1));
+ m_ip += 3;
+ break;
+ case CEE_LDLOC:
+ EmitLoadVar(numArgs + getU2LittleEndian(m_ip + 1));
+ m_ip += 3;
+ break;
+ case CEE_STLOC:
+ EmitStoreVar(numArgs + getU2LittleEndian(m_ip + 1));\
+ m_ip += 3;
+ break;
+ case CEE_CEQ:
+ EmitCompareOp(INTOP_CEQ_I4);
+ m_ip++;
+ break;
+ case CEE_CGT:
+ EmitCompareOp(INTOP_CGT_I4);
+ m_ip++;
+ break;
+ case CEE_CGT_UN:
+ EmitCompareOp(INTOP_CGT_UN_I4);
+ m_ip++;
+ break;
+ case CEE_CLT:
+ EmitCompareOp(INTOP_CLT_I4);
+ m_ip++;
+ break;
+ case CEE_CLT_UN:
+ EmitCompareOp(INTOP_CLT_UN_I4);
+ m_ip++;
+ break;
+ case CEE_CONSTRAINED:
+ {
+ uint32_t token = getU4LittleEndian(m_ip + 1);
+ CORINFO_RESOLVED_TOKEN resolvedToken;
+
+ resolvedToken.tokenScope = m_compScopeHnd;
+ resolvedToken.tokenContext = METHOD_BEING_COMPILED_CONTEXT();
+ resolvedToken.token = token;
+ resolvedToken.tokenType = CORINFO_TOKENKIND_Constrained;
+ m_compHnd->resolveToken(&resolvedToken);
+ constrainedClass = resolvedToken.hClass;
+ m_ip += 5;
+ break;
+ }
+ case CEE_READONLY:
+ readonly = true;
+ m_ip++;
+ break;
+ case CEE_TAILCALL:
+ tailcall = true;
+ m_ip++;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ if (m_pCBB->emitState == BBStateEmitting)
+ m_pCBB->emitState = BBStateEmitted;
+
+ // If no bblocks were emitted during the last iteration, there is no point to try again
+ // Some bblocks are just unreachable in the code.
+ if (needsRetryEmit && emittedBBlocks)
+ {
+ m_ip = m_pILCode;
+ m_pCBB = m_pEntryBB;
+
+ linkBBlocks = false;
+ needsRetryEmit = false;
+ INTERP_DUMP("retry emit\n");
+ goto retry_emit;
+ }
+
+ UnlinkUnreachableBBlocks();
+
+ return CORJIT_OK;
+exit_bad_code:
+ return CORJIT_BADCODE;
+}
+
+void InterpCompiler::UnlinkUnreachableBBlocks()
+{
+ // Unlink unreachable bblocks, prevBB is always an emitted bblock
+ InterpBasicBlock *prevBB = m_pEntryBB;
+ InterpBasicBlock *nextBB = prevBB->pNextBB;
+ while (nextBB != NULL)
+ {
+ if (nextBB->emitState == BBStateNotEmitted)
+ {
+ m_ppOffsetToBB[nextBB->ilOffset] = NULL;
+ prevBB->pNextBB = nextBB->pNextBB;
+ nextBB = prevBB->pNextBB;
+ }
+ else
+ {
+ prevBB = nextBB;
+ nextBB = nextBB->pNextBB;
+ }
+ }
+}
+
+void InterpCompiler::PrintClassName(CORINFO_CLASS_HANDLE cls)
+{
+ char className[100];
+ m_compHnd->printClassName(cls, className, 100);
+ printf("%s", className);
+}
+
+void InterpCompiler::PrintMethodName(CORINFO_METHOD_HANDLE method)
+{
+ char methodName[100];
+
+ CORINFO_CLASS_HANDLE cls = m_compHnd->getMethodClass(method);
+ PrintClassName(cls);
+
+ m_compHnd->printMethodName(method, methodName, 100);
+ printf(".%s", methodName);
+}
+
+void InterpCompiler::PrintCode()
+{
+ for (InterpBasicBlock *pBB = m_pEntryBB; pBB != NULL; pBB = pBB->pNextBB)
+ PrintBBCode(pBB);
+}
+
+void InterpCompiler::PrintBBCode(InterpBasicBlock *pBB)
+{
+ printf("BB%d:\n", pBB->index);
+ for (InterpInst *ins = pBB->pFirstIns; ins != NULL; ins = ins->pNext)
+ PrintIns(ins);
+}
+
+void InterpCompiler::PrintIns(InterpInst *ins)
+{
+ int32_t opcode = ins->opcode;
+ if (ins->ilOffset == -1)
+ printf("IL_----: %-14s", InterpOpName(opcode));
+ else
+ printf("IL_%04x: %-14s", ins->ilOffset, InterpOpName(opcode));
+
+ if (g_interpOpDVars[opcode] > 0)
+ printf(" [%d <-", ins->dVar);
+ else
+ printf(" [nil <-");
+
+ if (g_interpOpSVars[opcode] > 0)
+ {
+ for (int i = 0; i < g_interpOpSVars[opcode]; i++)
+ {
+ if (ins->sVars[i] == CALL_ARGS_SVAR)
+ {
+ printf(" c:");
+ if (ins->info.pCallInfo && ins->info.pCallInfo->pCallArgs)
+ {
+ int *callArgs = ins->info.pCallInfo->pCallArgs;
+ while (*callArgs != CALL_ARGS_TERMINATOR)
+ {
+ printf(" %d", *callArgs);
+ callArgs++;
+ }
+ }
+ }
+ else
+ {
+ printf(" %d", ins->sVars[i]);
+ }
+ }
+ printf("],");
+ }
+ else
+ {
+ printf(" nil],");
+ }
+
+ // LDLOCA has special semantics, it has data in sVars[0], but it doesn't have any sVars
+ if (opcode == INTOP_LDLOCA)
+ printf(" %d", ins->sVars[0]);
+ else
+ PrintInsData(ins, ins->ilOffset, &ins->data[0], ins->opcode);
+ printf("\n");
+}
+
+void InterpCompiler::PrintInsData(InterpInst *ins, int32_t insOffset, const int32_t *pData, int32_t opcode)
+{
+ switch (g_interpOpArgType[opcode]) {
+ case InterpOpNoArgs:
+ break;
+ case InterpOpInt:
+ printf(" %d", *pData);
+ break;
+ case InterpOpBranch:
+ if (ins)
+ printf(" BB%d", ins->info.pTargetBB->index);
+ else
+ printf(" IR_%04x", insOffset + *pData);
+ break;
+ case InterpOpSwitch:
+ {
+ int32_t n = *pData;
+ printf(" (");
+ for (int i = 0; i < n; i++)
+ {
+ if (i > 0)
+ printf(", ");
+
+ if (ins)
+ printf("BB%d", ins->info.ppTargetBBTable[i]->index);
+ else
+ printf("IR_%04x", insOffset + 3 + i + *(pData + 1 + i));
+ }
+ printf(")");
+ break;
+ }
+ case InterpOpMethodToken:
+ {
+ CORINFO_METHOD_HANDLE mh = (CORINFO_METHOD_HANDLE)((size_t)m_dataItems.Get(*pData) & ~INTERP_METHOD_DESC_TAG);
+ printf(" ");
+ PrintMethodName(mh);
+ break;
+ }
+ default:
+ assert(0);
+ break;
+ }
+}
+
+void InterpCompiler::PrintCompiledCode()
+{
+ const int32_t *ip = m_pMethodCode;
+ const int32_t *end = m_pMethodCode + m_methodCodeSize;
+
+ while (ip < end)
+ {
+ PrintCompiledIns(ip, m_pMethodCode);
+ ip = InterpNextOp(ip);
+ }
+}
+
+void InterpCompiler::PrintCompiledIns(const int32_t *ip, const int32_t *start)
+{
+ int32_t opcode = *ip;
+ int32_t insOffset = (int32_t)(ip - start);
+
+ printf("IR_%04x: %-14s", insOffset, InterpOpName(opcode));
+ ip++;
+
+ if (g_interpOpDVars[opcode] > 0)
+ printf(" [%d <-", *ip++);
+ else
+ printf(" [nil <-");
+
+ if (g_interpOpSVars[opcode] > 0)
+ {
+ for (int i = 0; i < g_interpOpSVars[opcode]; i++)
+ printf(" %d", *ip++);
+ printf("],");
+ }
+ else
+ {
+ printf(" nil],");
+ }
+
+ PrintInsData(NULL, insOffset, ip, opcode);
+ printf("\n");
}
diff --git a/src/coreclr/interpreter/compiler.h b/src/coreclr/interpreter/compiler.h
index fac63198fcb2d9..1f93a9a308f255 100644
--- a/src/coreclr/interpreter/compiler.h
+++ b/src/coreclr/interpreter/compiler.h
@@ -5,6 +5,7 @@
#define _COMPILER_H_
#include "intops.h"
+#include "datastructs.h"
// Types that can exist on the IL execution stack. They are used only during
// IL import compilation stage.
@@ -15,8 +16,13 @@ enum StackType {
StackTypeR8,
StackTypeO,
StackTypeVT,
- StackTypeMP,
- StackTypeF
+ StackTypeByRef,
+ StackTypeF,
+#ifdef TARGET_64BIT
+ StackTypeI = StackTypeI8
+#else
+ StackTypeI = StackTypeI4
+#endif
};
// Types relevant for interpreter vars and opcodes. They are used in the final
@@ -32,7 +38,8 @@ enum InterpType {
InterpTypeR8,
InterpTypeO,
InterpTypeVT,
- InterpTypeVOID,
+ InterpTypeByRef,
+ InterpTypeVoid,
#ifdef TARGET_64BIT
InterpTypeI = InterpTypeI8
#else
@@ -40,16 +47,40 @@ enum InterpType {
#endif
};
+#ifdef DEBUG
+#define INTERP_DUMP(...) \
+ { \
+ if (m_verbose) \
+ printf(__VA_ARGS__); \
+ }
+#else
+#define INTERP_DUMP(...)
+#endif
+
+struct InterpInst;
+struct InterpBasicBlock;
+
struct InterpCallInfo
{
// For call instructions, this represents an array of all call arg vars
// in the order they are pushed to the stack. This makes it easy to find
// all source vars for these types of opcodes. This is terminated with -1.
- int *pCallArgs;
- int callOffset;
+ int32_t *pCallArgs;
+ int32_t callOffset;
+ union {
+ // Array of call dependencies that need to be resolved before
+ TSList *callDeps;
+ // Stack end offset of call arguments
+ int32_t callEndOffset;
+ };
};
-struct InterpBasicBlock;
+enum InterpInstFlags
+{
+ INTERP_INST_FLAG_CALL = 0x01,
+ // Flag used internally by the var offset allocator
+ INTERP_INST_FLAG_ACTIVE_CALL = 0x02
+};
struct InterpInst
{
@@ -94,13 +125,23 @@ struct InterpInst
};
#define CALL_ARGS_SVAR -2
+#define CALL_ARGS_TERMINATOR -1
+struct StackInfo;
+
+enum InterpBBState
+{
+ BBStateNotEmitted,
+ BBStateEmitting,
+ BBStateEmitted
+};
struct InterpBasicBlock
{
int32_t index;
int32_t ilOffset, nativeOffset;
int32_t stackHeight;
+ StackInfo *pStackState;
InterpInst *pFirstIns, *pLastIns;
InterpBasicBlock *pNextBB;
@@ -108,18 +149,66 @@ struct InterpBasicBlock
int inCount, outCount;
InterpBasicBlock **ppInBBs;
InterpBasicBlock **ppOutBBs;
+
+ InterpBBState emitState;
+
+ InterpBasicBlock(int32_t index) : InterpBasicBlock(index, 0) { }
+
+ InterpBasicBlock(int32_t index, int32_t ilOffset)
+ {
+ this->index = index;
+ this->ilOffset = ilOffset;
+ nativeOffset = -1;
+ stackHeight = -1;
+
+ pFirstIns = pLastIns = NULL;
+ pNextBB = NULL;
+
+ inCount = 0;
+ outCount = 0;
+
+ emitState = BBStateNotEmitted;
+ }
};
struct InterpVar
{
CORINFO_CLASS_HANDLE clsHnd;
- InterpType mt;
+ InterpType interpType;
int indirects;
int offset;
int size;
// live_start and live_end are used by the offset allocator
int liveStart;
int liveEnd;
+ // index of first basic block where this var is used
+ int bbIndex;
+ // If var is callArgs, this is the call instruction using it.
+ // Only used by the var offset allocator
+ InterpInst *call;
+
+ unsigned int callArgs : 1; // Var used as argument to a call
+ unsigned int noCallArgs : 1; // Var can't be used as argument to a call, needs to be copied to temp
+ unsigned int global : 1; // Dedicated stack offset throughout method execution
+ unsigned int ILGlobal : 1; // Args and IL locals
+ unsigned int alive : 1; // Used internally by the var offset allocator
+
+ InterpVar(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd, int size)
+ {
+ this->interpType = interpType;
+ this->clsHnd = clsHnd;
+ this->size = size;
+ offset = -1;
+ liveStart = -1;
+ bbIndex = -1;
+ indirects = 0;
+
+ callArgs = false;
+ noCallArgs = false;
+ global = false;
+ ILGlobal = false;
+ alive = false;
+ }
};
struct StackInfo
@@ -133,6 +222,38 @@ struct StackInfo
// The var associated with the value of this stack entry. Every time we push on
// the stack a new var is created.
int var;
+
+ StackInfo(StackType type)
+ {
+ this->type = type;
+ clsHnd = NULL;
+ size = 0;
+ var = -1;
+ }
+};
+
+enum RelocType
+{
+ RelocLongBranch,
+ RelocSwitch
+};
+
+struct Reloc
+{
+ RelocType type;
+ // For branch relocation, how many sVar slots to skip
+ int skip;
+ // Base offset that the relative offset to be embedded in IR applies to
+ int32_t offset;
+ InterpBasicBlock *pTargetBB;
+
+ Reloc(RelocType type, int32_t offset, InterpBasicBlock *pTargetBB, int skip)
+ {
+ this->type = type;
+ this->offset = offset;
+ this->pTargetBB = pTargetBB;
+ this->skip = skip;
+ }
};
typedef class ICorJitInfo* COMP_HANDLE;
@@ -141,14 +262,36 @@ class InterpCompiler
{
private:
CORINFO_METHOD_HANDLE m_methodHnd;
+ CORINFO_MODULE_HANDLE m_compScopeHnd;
COMP_HANDLE m_compHnd;
CORINFO_METHOD_INFO* m_methodInfo;
+ bool m_verbose;
+
+ static int32_t InterpGetMovForType(InterpType interpType, bool signExtend);
+
+ uint8_t* m_ip;
+ uint8_t* m_pILCode;
+ int32_t m_ILCodeSize;
+ int32_t m_currentILOffset;
+
+ // This represents a mapping from indexes to pointer sized data. During compilation, an
+ // instruction can request an index for some data (like a MethodDesc pointer), that it
+ // will then embed in the instruction stream. The data item table will be referenced
+ // from the interpreter code header during execution.
+ // FIXME during compilation this should be a hashtable for fast lookup of duplicates
+ TArray m_dataItems;
+ int32_t GetDataItemIndex(void* data);
+ int32_t GetMethodDataItemIndex(CORINFO_METHOD_HANDLE mHandle);
int GenerateCode(CORINFO_METHOD_INFO* methodInfo);
void* AllocMethodData(size_t numBytes);
+ // FIXME Mempool allocation currently leaks. We need to add an allocator and then
+ // free all memory when method is finished compilling.
void* AllocMemPool(size_t numBytes);
+ void* AllocMemPool0(size_t numBytes);
void* AllocTemporary(size_t numBytes);
+ void* AllocTemporary0(size_t numBytes);
void* ReallocTemporary(void* ptr, size_t numBytes);
void FreeTemporary(void* ptr);
@@ -175,23 +318,34 @@ class InterpCompiler
int m_BBCount = 0;
InterpBasicBlock** m_ppOffsetToBB;
- InterpBasicBlock* AllocBB();
+ InterpBasicBlock* AllocBB(int32_t ilOffset);
InterpBasicBlock* GetBB(int32_t ilOffset);
void LinkBBs(InterpBasicBlock *from, InterpBasicBlock *to);
void UnlinkBBs(InterpBasicBlock *from, InterpBasicBlock *to);
+ void EmitBranch(InterpOpcode opcode, int ilOffset);
+ void EmitOneArgBranch(InterpOpcode opcode, int ilOffset, int insSize);
+ void EmitTwoArgBranch(InterpOpcode opcode, int ilOffset, int insSize);
+
+ void EmitBBEndVarMoves(InterpBasicBlock *pTargetBB);
+ void InitBBStackState(InterpBasicBlock *pBB);
+ void UnlinkUnreachableBBlocks();
+
// Vars
InterpVar *m_pVars = NULL;
int32_t m_varsSize = 0;
int32_t m_varsCapacity = 0;
- int32_t CreateVarExplicit(InterpType mt, CORINFO_CLASS_HANDLE clsHnd, int size);
+ int32_t CreateVarExplicit(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd, int size);
- int32_t m_totalVarsStackSize = 0;
+ int32_t m_totalVarsStackSize;
int32_t m_paramAreaOffset = 0;
+ int32_t m_ILLocalsOffset, m_ILLocalsSize;
void AllocVarOffsetCB(int *pVar, void *pData);
int32_t AllocVarOffset(int var, int32_t *pPos);
+ int32_t GetInterpTypeStackSize(CORINFO_CLASS_HANDLE clsHnd, InterpType interpType, int32_t *pAlign);
+ void CreateILVars();
// Stack
StackInfo *m_pStackPointer, *m_pStackBase;
@@ -201,21 +355,59 @@ class InterpCompiler
bool CheckStackHelper(int n);
void EnsureStack(int additional);
void PushTypeExplicit(StackType stackType, CORINFO_CLASS_HANDLE clsHnd, int size);
- void PushType(StackType stackType, CORINFO_CLASS_HANDLE clsHnd);
+ void PushStackType(StackType stackType, CORINFO_CLASS_HANDLE clsHnd);
+ void PushInterpType(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd);
void PushTypeVT(CORINFO_CLASS_HANDLE clsHnd, int size);
+ // Code emit
+ void EmitConv(StackInfo *sp, InterpInst *prevIns, StackType type, InterpOpcode convOp);
+ void EmitLoadVar(int var);
+ void EmitStoreVar(int var);
+ void EmitBinaryArithmeticOp(int32_t opBase);
+ void EmitUnaryArithmeticOp(int32_t opBase);
+ void EmitShiftOp(int32_t opBase);
+ void EmitCompareOp(int32_t opBase);
+ void EmitCall(CORINFO_CLASS_HANDLE constrainedClass, bool readonly, bool tailcall);
+ bool EmitCallIntrinsics(CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO sig);
+
+ // Var Offset allocator
+ TArray *m_pActiveCalls;
+ TArray *m_pActiveVars;
+ TSList *m_pDeferredCalls;
+
+ int32_t AllocGlobalVarOffset(int var);
+ void SetVarLiveRange(int32_t var, int insIndex);
+ void SetVarLiveRangeCB(int32_t *pVar, void *pData);
+ void InitializeGlobalVar(int32_t var, int bbIndex);
+ void InitializeGlobalVarCB(int32_t *pVar, void *pData);
+ void InitializeGlobalVars();
+ void EndActiveCall(InterpInst *call);
+ void CompactActiveVars(int32_t *current_offset);
+
// Passes
int32_t* m_pMethodCode;
- int32_t m_MethodCodeSize; // in int32_t
+ int32_t m_methodCodeSize; // code size measured in int32_t slots, instead of bytes
void AllocOffsets();
int32_t ComputeCodeSize();
void EmitCode();
- int32_t* EmitCodeIns(int32_t *ip, InterpInst *pIns);
+ int32_t* EmitCodeIns(int32_t *ip, InterpInst *pIns, TArray *relocs);
+ void PatchRelocations(TArray *relocs);
InterpMethod* CreateInterpMethod();
+ bool CreateBasicBlocks(CORINFO_METHOD_INFO* methodInfo);
+
+ // Debug
+ void PrintClassName(CORINFO_CLASS_HANDLE cls);
+ void PrintMethodName(CORINFO_METHOD_HANDLE method);
+ void PrintCode();
+ void PrintBBCode(InterpBasicBlock *pBB);
+ void PrintIns(InterpInst *ins);
+ void PrintInsData(InterpInst *ins, int32_t offset, const int32_t *pData, int32_t opcode);
+ void PrintCompiledCode();
+ void PrintCompiledIns(const int32_t *ip, const int32_t *start);
public:
- InterpCompiler(COMP_HANDLE compHnd, CORINFO_METHOD_INFO* methodInfo);
+ InterpCompiler(COMP_HANDLE compHnd, CORINFO_METHOD_INFO* methodInfo, bool verbose);
InterpMethod* CompileMethod();
diff --git a/src/coreclr/interpreter/compileropt.cpp b/src/coreclr/interpreter/compileropt.cpp
index 9e1f83ac34a1ed..cc188d53101a57 100644
--- a/src/coreclr/interpreter/compileropt.cpp
+++ b/src/coreclr/interpreter/compileropt.cpp
@@ -18,23 +18,386 @@ int32_t InterpCompiler::AllocVarOffset(int var, int32_t *pPos)
return m_pVars[var].offset;
}
-void InterpCompiler::AllocVarOffsetCB(int *pVar, void *pData)
+// Global vars are variables that are referenced from multiple basic blocks. We reserve
+// a dedicated slot for each such variable.
+int32_t InterpCompiler::AllocGlobalVarOffset(int var)
{
- AllocVarOffset(*pVar, &m_totalVarsStackSize);
+ return AllocVarOffset(var, &m_totalVarsStackSize);
+}
+
+// For a var that is local to the current bblock that we process, as we iterate
+// over instructions we mark the first and last intruction using it.
+void InterpCompiler::SetVarLiveRange(int32_t var, int insIndex)
+{
+ // We don't track liveness yet for global vars
+ if (m_pVars[var].global)
+ return;
+ if (m_pVars[var].liveStart == -1)
+ m_pVars[var].liveStart = insIndex;
+ m_pVars[var].liveEnd = insIndex;
+}
+
+void InterpCompiler::SetVarLiveRangeCB(int32_t *pVar, void *pData)
+{
+ SetVarLiveRange(*pVar, (int)(size_t)pData);
+}
+
+void InterpCompiler::InitializeGlobalVar(int32_t var, int bbIndex)
+{
+ // Check if already handled
+ if (m_pVars[var].global)
+ return;
+
+ if (m_pVars[var].bbIndex == -1)
+ {
+ m_pVars[var].bbIndex = bbIndex;
+ }
+ else if (m_pVars[var].bbIndex != bbIndex)
+ {
+ AllocGlobalVarOffset(var);
+ m_pVars[var].global = true;
+ INTERP_DUMP("alloc global var %d to offset %d\n", var, m_pVars[var].offset);
+ }
+}
+
+void InterpCompiler::InitializeGlobalVarCB(int32_t *pVar, void *pData)
+{
+ InitializeGlobalVar(*pVar, (int)(size_t)pData);
+}
+
+void InterpCompiler::InitializeGlobalVars()
+{
+ InterpBasicBlock *pBB;
+ for (pBB = m_pEntryBB; pBB != NULL; pBB = pBB->pNextBB)
+ {
+ InterpInst *pIns;
+
+ for (pIns = pBB->pFirstIns; pIns != NULL; pIns = pIns->pNext) {
+
+ int32_t opcode = pIns->opcode;
+ if (opcode == INTOP_NOP)
+ continue;
+ if (opcode == INTOP_LDLOCA)
+ {
+ int var = pIns->sVars[0];
+ // If global flag is set, it means its offset was already allocated
+ if (!m_pVars[var].global)
+ {
+ AllocGlobalVarOffset(var);
+ m_pVars[var].global = true;
+ INTERP_DUMP("alloc global var %d to offset %d\n", var, m_pVars[var].offset);
+ }
+ }
+ ForEachInsVar(pIns, (void*)(size_t)pBB->index, &InterpCompiler::InitializeGlobalVarCB);
+ }
+ }
+ m_totalVarsStackSize = ALIGN_UP_TO(m_totalVarsStackSize, INTERP_STACK_ALIGNMENT);
+}
+
+// In the final codegen, each call instruction will receive a single offset as an argument. At this
+// offset all the call arguments will be located. This offset will point into the param area. Vars
+// allocated here have special constraints compared to normal local/global vars.
+//
+// For each call instruction, this method computes its args offset. The call offset is computed as
+// the max offset of all call offsets on which the call depends. Stack ensures that all call offsets
+// on which the call depends are calculated before the call in question, by deferring calls from the
+// last to the first one.
+//
+// This method allocates offsets of resolved calls following a constraint where the base offset
+// of a call must be greater than the offset of any argument of other active call args. It first
+// removes the call from an array of active calls. If a match is found, the call is removed from
+// the array by moving the last entry into its place. Otherwise, it is a call without arguments.
+//
+// If there are active calls, the call in question is pushed onto the stack as a deferred call.
+// The call contains a list of other active calls on which it depends. Those calls need to be
+// resolved first in order to determine optimal base offset for the call in question. Otherwise,
+// if there are no active calls, we resolve the call in question and deferred calls from the stack.
+//
+// For better understanding, consider a simple example:
+// a <- _
+// b <- _
+// call1 c <- b
+// d <- _
+// call2 _ <- a c d
+//
+// When `a` is defined, call2 becomes an active call, since `a` is part of call2 arguments.
+// When `b` is defined, call1 also becomes an active call,
+// When reaching call1, we attempt to resolve it. The problem with this is that call2 is already
+// active, and all arguments of call1 should be placed after any arguments of call2 (in this example
+// it would be enough for them to be placed after `a`, but for simplicity we place them after all
+// arguments, so after `d` offset). Given call1 offset depends on call2 offset, we initialize its
+// callDeps (to call2) and add call1 to the set of currently deferred calls. Call1 is no longer an
+// an active call at this point.
+// When reaching call2, we see we have no remaining active calls, so we will resolve its offset.
+// Once the offset is resolved, we continue to resolve each remaining call from the deferred list.
+// Processing call1, we iterate over each call dependency (in our case just call2) and allocate its
+// offset accordingly so it doesn't overlap with any call2 args offsets.
+void InterpCompiler::EndActiveCall(InterpInst *call)
+{
+ // Remove call from array
+ m_pActiveCalls->Remove(call);
+
+ // Push active call that should be resolved onto the stack
+ if (m_pActiveCalls->GetSize())
+ {
+ TSList *callDeps = NULL;
+ for (int i = 0; i < m_pActiveCalls->GetSize(); i++)
+ callDeps = TSList::Push(callDeps, m_pActiveCalls->Get(i));
+ call->info.pCallInfo->callDeps = callDeps;
+
+ m_pDeferredCalls = TSList::Push(m_pDeferredCalls, call);
+ }
+ else
+ {
+ call->info.pCallInfo->callDeps = NULL;
+ // If no other active calls, current active call and all deferred calls can be resolved from the stack
+ InterpInst *deferredCall = call;
+ while (deferredCall) {
+ // `base_offset` is a relative offset (to the start of the call args stack) where the args for this
+ // call reside. The deps for a call represent the list of active calls at the moment when the call ends.
+ // This means that all deps for a call end after the call in question. Given we iterate over the list
+ // of deferred calls from the last to the first one to end, all deps of a call are guaranteed to have
+ // been processed at this point.
+ int32_t baseOffset = 0;
+ for (TSList *list = deferredCall->info.pCallInfo->callDeps; list; list = list->pNext)
+ {
+ int32_t endOffset = list->data->info.pCallInfo->callEndOffset;
+ if (endOffset > baseOffset)
+ baseOffset = endOffset;
+ }
+ deferredCall->info.pCallInfo->callOffset = baseOffset;
+ // Compute to offset of each call argument
+ int32_t *callArgs = deferredCall->info.pCallInfo->pCallArgs;
+ if (callArgs && (*callArgs != -1))
+ {
+ int32_t var = *callArgs;
+ while (var != CALL_ARGS_TERMINATOR)
+ {
+ AllocVarOffset(var, &baseOffset);
+ callArgs++;
+ var = *callArgs;
+ }
+ }
+ deferredCall->info.pCallInfo->callEndOffset = ALIGN_UP_TO(baseOffset, INTERP_STACK_ALIGNMENT);
+
+ if (m_pDeferredCalls)
+ {
+ deferredCall = m_pDeferredCalls->data;
+ m_pDeferredCalls = TSList::Pop(m_pDeferredCalls);
+ }
+ else
+ {
+ deferredCall = NULL;
+ }
+ }
+ }
+}
+
+// Remove dead vars from the end of the active vars array and update the current offset
+// to point immediately after the first found alive var. The space that used to belong
+// to the now dead vars will be reused for future defined local vars in the same bblock.
+void InterpCompiler::CompactActiveVars(int32_t *pCurrentOffset)
+{
+ int32_t size = m_pActiveVars->GetSize();
+ if (!size)
+ return;
+ int32_t i = size - 1;
+ while (i >= 0)
+ {
+ int32_t var = m_pActiveVars->Get(i);
+ // If var is alive we can't compact anymore
+ if (m_pVars[var].alive)
+ return;
+ *pCurrentOffset = m_pVars[var].offset;
+ m_pActiveVars->RemoveAt(i);
+ i--;
+ }
}
void InterpCompiler::AllocOffsets()
{
- // FIXME add proper offset allocator
InterpBasicBlock *pBB;
+ m_pActiveVars = new TArray();
+ m_pActiveCalls = new TArray();
+ m_pDeferredCalls = NULL;
+
+ InitializeGlobalVars();
+
+ INTERP_DUMP("\nAllocating var offsets\n");
+
+ int finalVarsStackSize = m_totalVarsStackSize;
+ // We now have the top of stack offset. All local regs are allocated after this offset, with each basic block
for (pBB = m_pEntryBB; pBB != NULL; pBB = pBB->pNextBB)
{
InterpInst *pIns;
+ int insIndex = 0;
+
+ INTERP_DUMP("BB%d\n", pBB->index);
+
+ // All data structs should be left empty after a bblock iteration
+ assert(m_pActiveVars->GetSize() == 0);
+ assert(m_pActiveCalls->GetSize() == 0);
+ assert(m_pDeferredCalls == NULL);
for (pIns = pBB->pFirstIns; pIns != NULL; pIns = pIns->pNext)
- ForEachInsSVar(pIns, NULL, &InterpCompiler::AllocVarOffsetCB);
+ {
+ if (pIns->opcode == INTOP_NOP)
+ continue;
+
+ // TODO NewObj will be marked as noCallArgs
+ if (pIns->flags & INTERP_INST_FLAG_CALL)
+ {
+ if (pIns->info.pCallInfo && pIns->info.pCallInfo->pCallArgs)
+ {
+ int32_t *callArgs = pIns->info.pCallInfo->pCallArgs;
+ int32_t var = *callArgs;
+
+ while (var != -1)
+ {
+ if (m_pVars[var].global || m_pVars[var].noCallArgs)
+ {
+ // Some vars can't be allocated on the call args stack, since the constraint is that
+ // call args vars die after the call. This isn't necessarily true for global vars or
+ // vars that are used by other instructions aside from the call.
+ // We need to copy the var into a new tmp var
+ int newVar = CreateVarExplicit(m_pVars[var].interpType, m_pVars[var].clsHnd, m_pVars[var].size);
+ m_pVars[newVar].call = pIns;
+ m_pVars[newVar].callArgs = true;
+
+ int32_t opcode = InterpGetMovForType(m_pVars[newVar].interpType, false);
+ InterpInst *newInst = InsertInsBB(pBB, pIns->pPrev, opcode);
+ newInst->SetDVar(newVar);
+ newInst->SetSVar(newVar);
+ if (opcode == INTOP_MOV_VT)
+ newInst->data[0] = m_pVars[var].size;
+ // The arg of the call is no longer global
+ *callArgs = newVar;
+ // Also update liveness for this instruction
+ ForEachInsVar(newInst, (void*)(size_t)insIndex, &InterpCompiler::SetVarLiveRangeCB);
+ insIndex++;
+ }
+ else
+ {
+ // Flag this var as it has special storage on the call args stack
+ m_pVars[var].call = pIns;
+ m_pVars[var].callArgs = true;
+ }
+ callArgs++;
+ var = *callArgs;
+ }
+ }
+ }
+ // Set liveStart and liveEnd for every referenced local that is not global
+ ForEachInsVar(pIns, (void*)(size_t)insIndex, &InterpCompiler::SetVarLiveRangeCB);
+ insIndex++;
+ }
+ int32_t currentOffset = m_totalVarsStackSize;
+
+ insIndex = 0;
+ for (pIns = pBB->pFirstIns; pIns != NULL; pIns = pIns->pNext) {
+ int32_t opcode = pIns->opcode;
+ bool isCall = pIns->flags & INTERP_INST_FLAG_CALL;
+
+ if (opcode == INTOP_NOP)
+ continue;
+
+#ifdef DEBUG
+ if (m_verbose)
+ {
+ printf("\tins_index %d\t", insIndex);
+ PrintIns(pIns);
+ }
+#endif
+
+ // Expire source vars. We first mark them as not alive and then compact the array
+ for (int i = 0; i < g_interpOpSVars[opcode]; i++)
+ {
+ int32_t var = pIns->sVars[i];
+ if (var == CALL_ARGS_SVAR)
+ continue;
+ if (!m_pVars[var].global && m_pVars[var].liveEnd == insIndex)
+ {
+ // Mark the var as no longer being alive
+ assert(!m_pVars[var].callArgs);
+ m_pVars[var].alive = false;
+ }
+ }
+
+ if (isCall)
+ EndActiveCall(pIns);
+
+ CompactActiveVars(¤tOffset);
+
+ // Alloc dreg local starting at the stack_offset
+ if (g_interpOpDVars[opcode])
+ {
+ int32_t var = pIns->dVar;
+
+ if (m_pVars[var].callArgs)
+ {
+ InterpInst *call = m_pVars[var].call;
+ // Check if already added
+ if (!(call->flags & INTERP_INST_FLAG_ACTIVE_CALL))
+ {
+ m_pActiveCalls->Add(call);
+ // Mark a flag on it so we don't have to lookup the array with every argument store.
+ call->flags |= INTERP_INST_FLAG_ACTIVE_CALL;
+ }
+ }
+ else if (!m_pVars[var].global && m_pVars[var].offset == -1)
+ {
+ AllocVarOffset(var, ¤tOffset);
+ INTERP_DUMP("alloc var %d to offset %d\n", var, m_pVars[var].offset);
+
+ if (currentOffset > finalVarsStackSize)
+ finalVarsStackSize = currentOffset;
+
+ if (m_pVars[var].liveEnd > insIndex)
+ {
+ // If dVar is still used in the basic block, add it to the active list
+ m_pActiveVars->Add(var);
+ m_pVars[var].alive = true;
+ }
+ else
+ {
+ // Otherwise dealloc it
+ currentOffset = m_pVars[var].offset;
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (m_verbose)
+ {
+ printf("active vars:");
+ for (int i = 0; i < m_pActiveVars->GetSize(); i++)
+ {
+ int32_t var = m_pActiveVars->Get(i);
+ if (m_pVars[var].alive)
+ printf(" %d (end %d),", var, m_pVars[var].liveEnd);
+ }
+ printf("\n");
+ }
+#endif
+ insIndex++;
+ }
}
- m_totalVarsStackSize = ALIGN_UP_TO(m_totalVarsStackSize, INTERP_STACK_ALIGNMENT);
- m_paramAreaOffset = m_totalVarsStackSize;
+ finalVarsStackSize = ALIGN_UP_TO(finalVarsStackSize, INTERP_STACK_ALIGNMENT);
+
+ // Iterate over all call args locals, update their final offset (aka add td->total_locals_size to them)
+ // then also update td->total_locals_size to account for this space.
+ m_paramAreaOffset = finalVarsStackSize;
+ for (int32_t i = 0; i < m_varsSize; i++)
+ {
+ // These are allocated separately at the end of the stack
+ if (m_pVars[i].callArgs)
+ {
+ m_pVars[i].offset += m_paramAreaOffset;
+ int32_t topOffset = m_pVars[i].offset + m_pVars[i].size;
+ if (finalVarsStackSize < topOffset)
+ finalVarsStackSize = topOffset;
+ }
+ }
+ m_totalVarsStackSize = ALIGN_UP_TO(finalVarsStackSize, INTERP_STACK_ALIGNMENT);
}
diff --git a/src/coreclr/interpreter/datastructs.h b/src/coreclr/interpreter/datastructs.h
new file mode 100644
index 00000000000000..14d7f376e8fb5a
--- /dev/null
+++ b/src/coreclr/interpreter/datastructs.h
@@ -0,0 +1,128 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef _DATASTRUCTS_H_
+#define _DATASTRUCTS_H_
+
+template
+class TArray
+{
+private:
+ int32_t m_size, m_capacity;
+ T *m_array;
+
+ void Grow()
+ {
+ if (m_capacity)
+ m_capacity *= 2;
+ else
+ m_capacity = 16;
+
+ m_array = (T*)realloc(m_array, m_capacity * sizeof(T));
+ }
+public:
+ TArray()
+ {
+ m_size = 0;
+ m_capacity = 0;
+ m_array = NULL;
+ }
+
+ ~TArray()
+ {
+ if (m_capacity > 0)
+ free(m_array);
+ }
+
+ int32_t GetSize()
+ {
+ return m_size;
+ }
+
+ int32_t Add(T element)
+ {
+ if (m_size == m_capacity)
+ Grow();
+ m_array[m_size] = element;
+ return m_size++;
+ }
+
+ T Get(int32_t index)
+ {
+ assert(index < m_size);
+ return m_array[index];
+ }
+
+ int32_t Find(T element)
+ {
+ for (int i = 0; i < m_size; i++)
+ {
+ if (element == m_array[i])
+ return i;
+ }
+ return -1;
+ }
+
+ // Assumes elements are unique
+ void RemoveAt(int32_t index)
+ {
+ assert(index < m_size);
+ m_size--;
+ // Since this entry is removed, move the last entry into it
+ if (m_size > 0 && index < m_size)
+ m_array[index] = m_array[m_size];
+ }
+
+ // Assumes elements are unique
+ void Remove(T element)
+ {
+ for (int32_t i = 0; i < m_size; i++)
+ {
+ if (element == m_array[i])
+ {
+ RemoveAt(i);
+ break;
+ }
+ }
+ }
+
+ void Clear()
+ {
+ m_size = 0;
+ }
+};
+
+// Singly linked list, implemented as a stack
+template
+struct TSList
+{
+ T data;
+ TSList *pNext;
+
+ TSList(T data, TSList *pNext)
+ {
+ this->data = data;
+ this->pNext = pNext;
+ }
+
+ static TSList* Push(TSList *head, T data)
+ {
+ TSList *newHead = new TSList(data, head);
+ return newHead;
+ }
+
+ static TSList* Pop(TSList *head)
+ {
+ TSList *next = head->pNext;
+ delete head;
+ return next;
+ }
+
+ static void Free(TSList *head)
+ {
+ while (head != NULL)
+ head = Pop(head);
+ }
+};
+
+#endif
diff --git a/src/coreclr/interpreter/eeinterp.cpp b/src/coreclr/interpreter/eeinterp.cpp
index 8af5a051381e6a..7d7960e5ebc88a 100644
--- a/src/coreclr/interpreter/eeinterp.cpp
+++ b/src/coreclr/interpreter/eeinterp.cpp
@@ -40,6 +40,9 @@ extern "C" INTERP_API ICorJitCompiler* getJit()
return &g_CILInterp;
}
+
+static CORINFO_MODULE_HANDLE g_interpModule = NULL;
+
//****************************************************************************
CorJitResult CILInterp::compileMethod(ICorJitInfo* compHnd,
CORINFO_METHOD_INFO* methodInfo,
@@ -48,20 +51,34 @@ CorJitResult CILInterp::compileMethod(ICorJitInfo* compHnd,
uint32_t* nativeSizeOfCode)
{
- const char *methodName = compHnd->getMethodNameFromMetadata(methodInfo->ftn, nullptr, nullptr, nullptr, 0);
+ bool doInterpret;
- // TODO: replace this by something like the JIT does to support multiple methods being specified and we don't
- // keep fetching it on each call to compileMethod
- const char *methodToInterpret = g_interpHost->getStringConfigValue("AltJit");
- bool doInterpret = (methodName != NULL && strcmp(methodName, methodToInterpret) == 0);
- g_interpHost->freeStringConfigValue(methodToInterpret);
+ if (g_interpModule != NULL)
+ {
+ if (methodInfo->scope == g_interpModule)
+ doInterpret = true;
+ else
+ doInterpret = false;
+ }
+ else
+ {
+ const char *methodName = compHnd->getMethodNameFromMetadata(methodInfo->ftn, nullptr, nullptr, nullptr, 0);
+
+ // TODO: replace this by something like the JIT does to support multiple methods being specified and we don't
+ // keep fetching it on each call to compileMethod
+ const char *methodToInterpret = g_interpHost->getStringConfigValue("AltJit");
+ doInterpret = (methodName != NULL && strcmp(methodName, methodToInterpret) == 0);
+ g_interpHost->freeStringConfigValue(methodToInterpret);
+ if (doInterpret)
+ g_interpModule = methodInfo->scope;
+ }
if (!doInterpret)
{
return CORJIT_SKIPPED;
}
- InterpCompiler compiler(compHnd, methodInfo);
+ InterpCompiler compiler(compHnd, methodInfo, false /* verbose */);
InterpMethod *pMethod = compiler.CompileMethod();
int32_t IRCodeSize;
diff --git a/src/coreclr/interpreter/interpretershared.h b/src/coreclr/interpreter/interpretershared.h
index f977aac32b8dbc..5e8928b840bafd 100644
--- a/src/coreclr/interpreter/interpretershared.h
+++ b/src/coreclr/interpreter/interpretershared.h
@@ -6,26 +6,24 @@
#ifndef _INTERPRETERSHARED_H_
#define _INTERPRETERSHARED_H_
+#include "intopsshared.h"
+
#define INTERP_STACK_SLOT_SIZE 8 // Alignment of each var offset on the interpreter stack
#define INTERP_STACK_ALIGNMENT 16 // Alignment of interpreter stack at the start of a frame
-#define OPDEF(a,b,c,d,e,f) a,
-typedef enum
-{
-#include "intops.def"
- INTOP_LAST
-} InterpOpcode;
-#undef OPDEF
+#define INTERP_METHOD_DESC_TAG 4 // Tag of a MethodDesc in the interp method dataItems
struct InterpMethod
{
CORINFO_METHOD_HANDLE methodHnd;
int32_t allocaSize;
+ void** pDataItems;
- InterpMethod(CORINFO_METHOD_HANDLE methodHnd, int32_t allocaSize)
+ InterpMethod(CORINFO_METHOD_HANDLE methodHnd, int32_t allocaSize, void** pDataItems)
{
this->methodHnd = methodHnd;
this->allocaSize = allocaSize;
+ this->pDataItems = pDataItems;
}
};
diff --git a/src/coreclr/interpreter/intops.cpp b/src/coreclr/interpreter/intops.cpp
index 7f94eb4a363c2a..94f259d14d10bc 100644
--- a/src/coreclr/interpreter/intops.cpp
+++ b/src/coreclr/interpreter/intops.cpp
@@ -4,6 +4,7 @@
#include "intops.h"
#include
+#include
// This, instead of an array of pointers, to optimize away a pointer and a relocation per string.
struct InterpOpNameCharacters
@@ -49,9 +50,15 @@ const InterpOpArgType g_interpOpArgType[] = {
#undef OPDEF
};
-const uint8_t* InterpNextOp(const uint8_t *ip)
+const int32_t* InterpNextOp(const int32_t *ip)
{
int len = g_interpOpLen[*ip];
+ if (len == 0)
+ {
+ assert(*ip == INTOP_SWITCH);
+ len = 3 + ip[2];
+ }
+
return ip + len;
}
@@ -60,3 +67,105 @@ const char* InterpOpName(int op)
return ((const char*)&g_interpOpNameCharacters) + g_interpOpNameOffsets[op];
}
+// Information about IL opcodes
+
+OPCODE_FORMAT const g_CEEOpArgs[] = {
+#define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) args,
+#include "opcode.def"
+#undef OPDEF
+};
+
+struct CEEOpNameCharacters
+{
+#define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) char c[sizeof(s)];
+#include "opcode.def"
+#undef OPDEF
+};
+
+const struct CEEOpNameCharacters g_CEEOpNameCharacters = {
+#define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) s,
+#include "opcode.def"
+#undef OPDEF
+};
+
+const uint32_t g_CEEOpNameOffsets[] = {
+#define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) offsetof(CEEOpNameCharacters, c),
+#include "opcode.def"
+#undef OPDEF
+};
+
+const char* CEEOpName(OPCODE op)
+{
+ return ((const char*)&g_CEEOpNameCharacters) + g_CEEOpNameOffsets[op];
+}
+
+// Also updates ip to skip over prefix, if any
+OPCODE CEEDecodeOpcode(const uint8_t **pIp)
+{
+ OPCODE res;
+ const uint8_t *ip = *pIp;
+
+ if (*ip == 0xFE)
+ {
+ // Double byte encoding, offset
+ ip++;
+ res = (OPCODE)(*ip + CEE_ARGLIST);
+ }
+ else
+ {
+ res = (OPCODE)*ip;
+ }
+ *pIp = ip;
+ return res;
+}
+
+int32_t CEEOpcodeSize(const uint8_t *ip, const uint8_t *codeEnd)
+{
+ const uint8_t *p = ip;
+ OPCODE opcode = CEEDecodeOpcode(&p);
+ OPCODE_FORMAT opArgs = g_CEEOpArgs[opcode];
+
+ size_t size = 0;
+
+ switch (opArgs)
+ {
+ case InlineNone:
+ size = 1;
+ break;
+ case InlineString:
+ case InlineType:
+ case InlineField:
+ case InlineMethod:
+ case InlineTok:
+ case InlineSig:
+ case ShortInlineR:
+ case InlineI:
+ case InlineBrTarget:
+ size = 5;
+ break;
+ case InlineVar:
+ size = 3;
+ break;
+ case ShortInlineVar:
+ case ShortInlineI:
+ case ShortInlineBrTarget:
+ size = 2;
+ break;
+ case InlineR:
+ case InlineI8:
+ size = 9;
+ break;
+ case InlineSwitch: {
+ size_t entries = getI4LittleEndian(p + 1);
+ size = 5 + 4 * entries;
+ break;
+ }
+ default:
+ assert(0);
+ }
+
+ if ((ip + size) >= codeEnd)
+ return -1;
+
+ return (int32_t)((p - ip) + size);
+}
diff --git a/src/coreclr/interpreter/intops.def b/src/coreclr/interpreter/intops.def
index 78cbf4909e74f7..fcdf33aa11575c 100644
--- a/src/coreclr/interpreter/intops.def
+++ b/src/coreclr/interpreter/intops.def
@@ -8,6 +8,190 @@
OPDEF(INTOP_NOP, "nop", 1, 0, 0, InterpOpNoArgs)
OPDEF(INTOP_RET, "ret", 2, 0, 1, InterpOpNoArgs)
+OPDEF(INTOP_RET_VT, "ret.vt", 3, 0, 1, InterpOpInt)
OPDEF(INTOP_RET_VOID, "ret.void", 1, 0, 0, InterpOpNoArgs)
OPDEF(INTOP_LDC_I4, "ldc.i4", 3, 1, 0, InterpOpInt)
+OPDEF(INTOP_LDC_I4_0, "ldc.i4.0", 2, 1, 0, InterpOpNoArgs)
+OPDEF(INTOP_LDC_I8_0, "ldc.i8.0", 2, 1, 0, InterpOpNoArgs)
+
+OPDEF(INTOP_MOV_I4_I1, "mov.i4.i1", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_MOV_I4_U1, "mov.i4.u1", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_MOV_I4_I2, "mov.i4.i2", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_MOV_I4_U2, "mov.i4.u2", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_MOV_4, "mov.4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_MOV_8, "mov.8", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_MOV_VT, "mov.vt", 4, 1, 1, InterpOpInt)
+
+OPDEF(INTOP_LDLOCA, "ldloca", 3, 1, 0, InterpOpInt)
+
+OPDEF(INTOP_SWITCH, "switch", 0, 0, 1, InterpOpSwitch)
+
+OPDEF(INTOP_BR, "br", 2, 0, 0, InterpOpBranch)
+
+OPDEF(INTOP_BRFALSE_I4, "brfalse.i4", 3, 0, 1, InterpOpBranch)
+OPDEF(INTOP_BRFALSE_I8, "brfalse.i8", 3, 0, 1, InterpOpBranch)
+OPDEF(INTOP_BRTRUE_I4, "brtrue.i4", 3, 0, 1, InterpOpBranch)
+OPDEF(INTOP_BRTRUE_I8, "brtrue.i8", 3, 0, 1, InterpOpBranch)
+
+OPDEF(INTOP_BEQ_I4, "beq.i4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BEQ_I8, "beq.i8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BEQ_R4, "beq.r4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BEQ_R8, "beq.r8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BGE_I4, "bge.i4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BGE_I8, "bge.i8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BGE_R4, "bge.r4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BGE_R8, "bge.r8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BGT_I4, "bgt.i4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BGT_I8, "bgt.i8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BGT_R4, "bgt.r4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BGT_R8, "bgt.r8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BLT_I4, "blt.i4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BLT_I8, "blt.i8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BLT_R4, "blt.r4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BLT_R8, "blt.r8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BLE_I4, "ble.i4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BLE_I8, "ble.i8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BLE_R4, "ble.r4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BLE_R8, "ble.r8", 4, 0, 2, InterpOpBranch)
+
+OPDEF(INTOP_BNE_UN_I4, "bne.un.i4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BNE_UN_I8, "bne.un.i8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BNE_UN_R4, "bne.un.r4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BNE_UN_R8, "bne.un.r8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BGE_UN_I4, "bge.un.i4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BGE_UN_I8, "bge.un.i8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BGE_UN_R4, "bge.un.r4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BGE_UN_R8, "bge.un.r8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BGT_UN_I4, "bgt.un.i4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BGT_UN_I8, "bgt.un.i8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BGT_UN_R4, "bgt.un.r4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BGT_UN_R8, "bgt.un.r8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BLE_UN_I4, "ble.un.i4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BLE_UN_I8, "ble.un.i8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BLE_UN_R4, "ble.un.r4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BLE_UN_R8, "ble.un.r8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BLT_UN_I4, "blt.un.i4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BLT_UN_I8, "blt.un.i8", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BLT_UN_R4, "blt.un.r4", 4, 0, 2, InterpOpBranch)
+OPDEF(INTOP_BLT_UN_R8, "blt.un.r8", 4, 0, 2, InterpOpBranch)
+
+// Unary operations
+
+OPDEF(INTOP_NEG_I4, "neg.i4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_NEG_I8, "neg.i8", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_NEG_R4, "neg.r4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_NEG_R8, "neg.r8", 3, 1, 1, InterpOpNoArgs)
+
+OPDEF(INTOP_NOT_I4, "not.i4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_NOT_I8, "not.i8", 3, 1, 1, InterpOpNoArgs)
+
+OPDEF(INTOP_CONV_R_UN_I4, "conv.r.un.i4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_R_UN_I8, "conv.r.un.i8", 3, 1, 1, InterpOpNoArgs)
+
+OPDEF(INTOP_CONV_I1_I4, "conv.i1.i4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_I1_I8, "conv.i1.i8", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_I1_R4, "conv.i1.r4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_I1_R8, "conv.i1.r8", 3, 1, 1, InterpOpNoArgs)
+
+OPDEF(INTOP_CONV_U1_I4, "conv.u1.i4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_U1_I8, "conv.u1.i8", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_U1_R4, "conv.u1.r4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_U1_R8, "conv.u1.r8", 3, 1, 1, InterpOpNoArgs)
+
+OPDEF(INTOP_CONV_I2_I4, "conv.i2.i4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_I2_I8, "conv.i2.i8", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_I2_R4, "conv.i2.r4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_I2_R8, "conv.i2.r8", 3, 1, 1, InterpOpNoArgs)
+
+OPDEF(INTOP_CONV_U2_I4, "conv.u2.i4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_U2_I8, "conv.u2.i8", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_U2_R4, "conv.u2.r4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_U2_R8, "conv.u2.r8", 3, 1, 1, InterpOpNoArgs)
+
+OPDEF(INTOP_CONV_I4_R4, "conv.i4.r4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_I4_R8, "conv.i4.r8", 3, 1, 1, InterpOpNoArgs)
+
+OPDEF(INTOP_CONV_U4_R4, "conv.u4.r4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_U4_R8, "conv.u4.r8", 3, 1, 1, InterpOpNoArgs)
+
+OPDEF(INTOP_CONV_I8_I4, "conv.i8.i4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_I8_U4, "conv.i8.u4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_I8_R4, "conv.i8.r4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_I8_R8, "conv.i8.r8", 3, 1, 1, InterpOpNoArgs)
+
+OPDEF(INTOP_CONV_R4_I4, "conv.r4.i4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_R4_I8, "conv.r4.i8", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_R4_R8, "conv.r4.r8", 3, 1, 1, InterpOpNoArgs)
+
+OPDEF(INTOP_CONV_R8_I4, "conv.r8.i4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_R8_I8, "conv.r8.i8", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_R8_R4, "conv.r8.r4", 3, 1, 1, InterpOpNoArgs)
+
+OPDEF(INTOP_CONV_U8_R4, "conv.u8.r4", 3, 1, 1, InterpOpNoArgs)
+OPDEF(INTOP_CONV_U8_R8, "conv.u8.r8", 3, 1, 1, InterpOpNoArgs)
+// Unary operations end
+
+// Binary operations
+
+OPDEF(INTOP_ADD_I4, "add.i4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_ADD_I8, "add.i8", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_ADD_R4, "add.r4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_ADD_R8, "add.r8", 4, 1, 2, InterpOpNoArgs)
+
+OPDEF(INTOP_SUB_I4, "sub.i4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_SUB_I8, "sub.i8", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_SUB_R4, "sub.r4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_SUB_R8, "sub.r8", 4, 1, 2, InterpOpNoArgs)
+
+OPDEF(INTOP_MUL_I4, "mul.i4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_MUL_I8, "mul.i8", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_MUL_R4, "mul.r4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_MUL_R8, "mul.r8", 4, 1, 2, InterpOpNoArgs)
+
+OPDEF(INTOP_AND_I4, "and.i4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_AND_I8, "and.i8", 4, 1, 2, InterpOpNoArgs)
+
+OPDEF(INTOP_OR_I4, "or.i4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_OR_I8, "or.i8", 4, 1, 2, InterpOpNoArgs)
+
+OPDEF(INTOP_XOR_I4, "xor.i4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_XOR_I8, "xor.i8", 4, 1, 2, InterpOpNoArgs)
+
+OPDEF(INTOP_SHR_UN_I4, "shr.un.i4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_SHR_UN_I8, "shr.un.i8", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_SHL_I4, "shl.i4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_SHL_I8, "shl.i8", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_SHR_I4, "shr.i4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_SHR_I8, "shr.i8", 4, 1, 2, InterpOpNoArgs)
+
+OPDEF(INTOP_CEQ_I4, "ceq.i4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_CEQ_I8, "ceq.i8", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_CEQ_R4, "ceq.r4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_CEQ_R8, "ceq.r8", 4, 1, 2, InterpOpNoArgs)
+
+OPDEF(INTOP_CGT_I4, "cgt.i4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_CGT_I8, "cgt.i8", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_CGT_R4, "cgt.r4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_CGT_R8, "cgt.r8", 4, 1, 2, InterpOpNoArgs)
+
+OPDEF(INTOP_CGT_UN_I4, "cgt.un.i4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_CGT_UN_I8, "cgt.un.i8", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_CGT_UN_R4, "cgt.un.r4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_CGT_UN_R8, "cgt.un.r8", 4, 1, 2, InterpOpNoArgs)
+
+OPDEF(INTOP_CLT_I4, "clt.i4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_CLT_I8, "clt.i8", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_CLT_R4, "clt.r4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_CLT_R8, "clt.r8", 4, 1, 2, InterpOpNoArgs)
+
+OPDEF(INTOP_CLT_UN_I4, "clt.un.i4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_CLT_UN_I8, "clt.un.i8", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_CLT_UN_R4, "clt.un.r4", 4, 1, 2, InterpOpNoArgs)
+OPDEF(INTOP_CLT_UN_R8, "clt.un.r8", 4, 1, 2, InterpOpNoArgs)
+// Binary operations end
+
+// Calls
+OPDEF(INTOP_CALL, "call", 4, 1, 1, InterpOpMethodToken)
+
+OPDEF(INTOP_FAILFAST, "failfast", 1, 0, 0, InterpOpNoArgs)
diff --git a/src/coreclr/interpreter/intops.h b/src/coreclr/interpreter/intops.h
index d058602508f50d..fa64b28a971974 100644
--- a/src/coreclr/interpreter/intops.h
+++ b/src/coreclr/interpreter/intops.h
@@ -4,19 +4,25 @@
#ifndef _INTOPS_H
#define _INTOPS_H
+#include "openum.h"
#include
+#include "intopsshared.h"
+
typedef enum
{
InterpOpNoArgs,
InterpOpInt,
+ InterpOpBranch,
+ InterpOpSwitch,
+ InterpOpMethodToken,
} InterpOpArgType;
extern const uint8_t g_interpOpLen[];
extern const int g_interpOpDVars[];
extern const int g_interpOpSVars[];
extern const InterpOpArgType g_interpOpArgType[];
-extern const uint8_t* InterpNextOp(const uint8_t* ip);
+extern const int32_t* InterpNextOp(const int32_t* ip);
// This, instead of an array of pointers, to optimize away a pointer and a relocation per string.
extern const uint32_t g_interpOpNameOffsets[];
@@ -25,4 +31,65 @@ extern const InterpOpNameCharacters g_interpOpNameCharacters;
const char* InterpOpName(int op);
+extern OPCODE_FORMAT const g_CEEOpArgs[];
+const char* CEEOpName(OPCODE op);
+OPCODE CEEDecodeOpcode(const uint8_t **ip);
+int CEEOpcodeSize(const uint8_t *ip, const uint8_t *codeEnd);
+
+#ifdef TARGET_64BIT
+#define INTOP_MOV_P INTOP_MOV_8
+#define INTOP_LDNULL INTOP_LDC_I8_0
+#else
+#define INTOP_MOV_P INTOP_MOV_4
+#define INTOP_LDNULL INTOP_LDC_I4_0
+#endif
+
+static inline bool InterpOpIsUncondBranch(int32_t opcode)
+{
+ return opcode == INTOP_BR;
+}
+
+static inline bool InterpOpIsCondBranch(int32_t opcode)
+{
+ return opcode >= INTOP_BRFALSE_I4 && opcode <= INTOP_BLT_UN_R8;
+}
+
+// Helpers for reading data from uint8_t code stream
+inline uint16_t getU2LittleEndian(const uint8_t* ptr)
+{
+ return *ptr | *(ptr + 1) << 8;
+}
+
+inline uint32_t getU4LittleEndian(const uint8_t* ptr)
+{
+ return *ptr | *(ptr + 1) << 8 | *(ptr + 2) << 16 | *(ptr + 3) << 24;
+}
+
+inline int16_t getI2LittleEndian(const uint8_t* ptr)
+{
+ return (int16_t)getU2LittleEndian(ptr);
+}
+
+inline int32_t getI4LittleEndian(const uint8_t* ptr)
+{
+ return (int32_t)getU4LittleEndian(ptr);
+}
+
+inline int64_t getI8LittleEndian(const uint8_t* ptr)
+{
+ return (int64_t)getI4LittleEndian(ptr) | ((int64_t)getI4LittleEndian(ptr + 4)) << 32;
+}
+
+inline float getR4LittleEndian(const uint8_t* ptr)
+{
+ int32_t val = getI4LittleEndian(ptr);
+ return *(float*)&val;
+}
+
+inline double getR8LittleEndian(const uint8_t* ptr)
+{
+ int64_t val = getI8LittleEndian(ptr);
+ return *(double*)&val;
+}
+
#endif
diff --git a/src/coreclr/interpreter/intopsshared.h b/src/coreclr/interpreter/intopsshared.h
new file mode 100644
index 00000000000000..80be6d2f53d946
--- /dev/null
+++ b/src/coreclr/interpreter/intopsshared.h
@@ -0,0 +1,15 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef _INTOPSSHARED_H_
+#define _INTOPSSHARED_H_
+
+#define OPDEF(a,b,c,d,e,f) a,
+typedef enum
+{
+#include "intops.def"
+ INTOP_LAST
+} InterpOpcode;
+#undef OPDEF
+
+#endif
diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp
index 134b749e084fee..6c4a631652e838 100644
--- a/src/coreclr/jit/assertionprop.cpp
+++ b/src/coreclr/jit/assertionprop.cpp
@@ -755,8 +755,8 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex asse
{
printf("Copy ");
}
- else if ((curAssertion->op2.kind == O2K_CONST_INT) || (curAssertion->op2.kind == O2K_CONST_LONG) ||
- (curAssertion->op2.kind == O2K_CONST_DOUBLE) || (curAssertion->op2.kind == O2K_ZEROOBJ))
+ else if ((curAssertion->op2.kind == O2K_CONST_INT) || (curAssertion->op2.kind == O2K_CONST_DOUBLE) ||
+ (curAssertion->op2.kind == O2K_ZEROOBJ))
{
printf("Constant ");
}
@@ -950,10 +950,6 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex asse
}
break;
- case O2K_CONST_LONG:
- printf("0x%016llx", curAssertion->op2.lconVal);
- break;
-
case O2K_CONST_DOUBLE:
if (FloatingPointUtils::isNegativeZero(curAssertion->op2.dconVal))
{
@@ -1130,39 +1126,28 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, GenTree* op2, optAsser
AssertionDsc assertion = {OAK_INVALID};
assert(assertion.assertionKind == OAK_INVALID);
- if (op1->OperIs(GT_BOUNDS_CHECK))
+ if (op1->OperIs(GT_BOUNDS_CHECK) && (assertionKind == OAK_NO_THROW))
{
- if (assertionKind == OAK_NO_THROW)
- {
- GenTreeBoundsChk* arrBndsChk = op1->AsBoundsChk();
- assertion.assertionKind = assertionKind;
- assertion.op1.kind = O1K_ARR_BND;
- assertion.op1.bnd.vnIdx = optConservativeNormalVN(arrBndsChk->GetIndex());
- assertion.op1.bnd.vnLen = optConservativeNormalVN(arrBndsChk->GetArrayLength());
-
- if ((assertion.op1.bnd.vnIdx == ValueNumStore::NoVN) || (assertion.op1.bnd.vnLen == ValueNumStore::NoVN))
- {
- // Don't make an assertion if one of the operands has no VN
- return NO_ASSERTION_INDEX;
- }
-
- goto DONE_ASSERTION;
- }
+ GenTreeBoundsChk* arrBndsChk = op1->AsBoundsChk();
+ assertion.assertionKind = assertionKind;
+ assertion.op1.kind = O1K_ARR_BND;
+ assertion.op1.bnd.vnIdx = optConservativeNormalVN(arrBndsChk->GetIndex());
+ assertion.op1.bnd.vnLen = optConservativeNormalVN(arrBndsChk->GetArrayLength());
}
-
//
// Are we trying to make a non-null assertion?
//
- if (op2 == nullptr)
+ else if (op2 == nullptr)
{
- //
+ if (!varTypeIsGC(op1))
+ {
+ return NO_ASSERTION_INDEX; // Don't make an assertion
+ }
+
// Must be an OAK_NOT_EQUAL assertion
- //
- noway_assert(assertionKind == OAK_NOT_EQUAL);
+ assert(assertionKind == OAK_NOT_EQUAL);
- //
// Set op1 to the instance pointer of the indirection
- //
op1 = op1->gtEffectiveVal();
ssize_t offset = 0;
@@ -1184,39 +1169,18 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, GenTree* op2, optAsser
}
}
- if (fgIsBigOffset(offset) || op1->gtOper != GT_LCL_VAR)
- {
- goto DONE_ASSERTION; // Don't make an assertion
- }
-
- unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
- LclVarDsc* lclVar = lvaGetDesc(lclNum);
-
- ValueNum vn;
-
- // We only perform null-checks on byrefs and GC refs
- if (!varTypeIsGC(lclVar->TypeGet()))
- {
- goto DONE_ASSERTION; // Don't make an assertion
- }
-
- // If the local variable has its address exposed then bail
- if (lclVar->IsAddressExposed())
+ if (!fgIsBigOffset(offset) && op1->OperIs(GT_LCL_VAR) && !lvaVarAddrExposed(op1->AsLclVar()->GetLclNum()))
{
- goto DONE_ASSERTION; // Don't make an assertion
+ assertion.op1.kind = O1K_LCLVAR;
+ assertion.op1.lcl.lclNum = op1->AsLclVarCommon()->GetLclNum();
+ assertion.op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum();
+ assertion.op1.vn = optConservativeNormalVN(op1);
+ assertion.assertionKind = assertionKind;
+ assertion.op2.kind = O2K_CONST_INT;
+ assertion.op2.vn = ValueNumStore::VNForNull();
+ assertion.op2.u1.iconVal = 0;
+ assertion.op2.SetIconFlag(GTF_EMPTY);
}
-
- assertion.op1.kind = O1K_LCLVAR;
- assertion.op1.lcl.lclNum = lclNum;
- assertion.op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum();
- vn = optConservativeNormalVN(op1);
-
- assertion.op1.vn = vn;
- assertion.assertionKind = assertionKind;
- assertion.op2.kind = O2K_CONST_INT;
- assertion.op2.vn = ValueNumStore::VNForNull();
- assertion.op2.u1.iconVal = 0;
- assertion.op2.SetIconFlag(GTF_EMPTY);
}
//
// Are we making an assertion about a local variable?
@@ -1264,10 +1228,6 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, GenTree* op2, optAsser
}
goto CNS_COMMON;
- case GT_CNS_LNG:
- op2Kind = O2K_CONST_LONG;
- goto CNS_COMMON;
-
case GT_CNS_DBL:
op2Kind = O2K_CONST_DOUBLE;
goto CNS_COMMON;
@@ -1290,9 +1250,8 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, GenTree* op2, optAsser
goto DONE_ASSERTION; // Don't make an assertion
}
- assertion.op2.kind = op2Kind;
- assertion.op2.lconVal = 0;
- assertion.op2.vn = optConservativeNormalVN(op2);
+ assertion.op2.kind = op2Kind;
+ assertion.op2.vn = optConservativeNormalVN(op2);
if (op2->gtOper == GT_CNS_INT)
{
@@ -1308,10 +1267,6 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, GenTree* op2, optAsser
assertion.op2.u1.iconVal = iconVal;
assertion.op2.SetIconFlag(op2->GetIconHandleFlag(), op2->AsIntCon()->gtFieldSeq);
}
- else if (op2->gtOper == GT_CNS_LNG)
- {
- assertion.op2.lconVal = op2->AsLngCon()->gtLconVal;
- }
else
{
noway_assert(op2->gtOper == GT_CNS_DBL);
@@ -1431,8 +1386,7 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, GenTree* op2, optAsser
ValueNum op2VN = optConservativeNormalVN(op2);
// For TP reasons, limited to 32-bit constants on the op2 side.
- if ((op1VN != ValueNumStore::NoVN) && (op2VN != ValueNumStore::NoVN) && vnStore->IsVNInt32Constant(op2VN) &&
- !vnStore->IsVNHandle(op2VN))
+ if (vnStore->IsVNInt32Constant(op2VN) && !vnStore->IsVNHandle(op2VN))
{
assert(assertionKind == OAK_EQUAL || assertionKind == OAK_NOT_EQUAL);
assertion.assertionKind = assertionKind;
@@ -1477,12 +1431,6 @@ AssertionIndex Compiler::optFinalizeCreatingAssertion(AssertionDsc* assertion)
{
return NO_ASSERTION_INDEX;
}
-
- // TODO: only copy assertions rely on valid SSA number so we could generate more assertions here
- if (assertion->op1.lcl.ssaNum == SsaConfig::RESERVED_SSA_NUM)
- {
- return NO_ASSERTION_INDEX;
- }
}
// Now add the assertion to our assertion table
@@ -1764,10 +1712,6 @@ void Compiler::optDebugCheckAssertion(AssertionDsc* assertion)
switch (assertion->op1.kind)
{
- case O1K_LCLVAR:
- assert(optLocalAssertionProp ||
- lvaGetDesc(assertion->op1.lcl.lclNum)->lvPerSsaData.IsValidSsaNum(assertion->op1.lcl.ssaNum));
- break;
case O1K_ARR_BND:
// It would be good to check that bnd.vnIdx and bnd.vnLen are valid value numbers.
assert(!optLocalAssertionProp);
@@ -1805,14 +1749,6 @@ void Compiler::optDebugCheckAssertion(AssertionDsc* assertion)
}
break;
- case O2K_CONST_LONG:
- {
- // All handles should be represented by O2K_CONST_INT,
- // so no handle bits should be set here.
- assert(!assertion->op2.HasIconFlag());
- }
- break;
-
case O2K_ZEROOBJ:
{
// We only make these assertion for stores (not control flow).
@@ -1885,8 +1821,8 @@ void Compiler::optCreateComplementaryAssertion(AssertionIndex assertionIndex, Ge
if ((candidateAssertion.op1.kind == O1K_LCLVAR) || (candidateAssertion.op1.kind == O1K_VN))
{
// "LCLVAR != CNS" is not a useful assertion (unless CNS is 0/1)
- if (((candidateAssertion.op2.kind == O2K_CONST_INT) || (candidateAssertion.op2.kind == O2K_CONST_LONG)) &&
- (candidateAssertion.op2.u1.iconVal != 0) && (candidateAssertion.op2.u1.iconVal != 1))
+ if (((candidateAssertion.op2.kind == O2K_CONST_INT)) && (candidateAssertion.op2.u1.iconVal != 0) &&
+ (candidateAssertion.op2.u1.iconVal != 1))
{
return;
}
@@ -1914,59 +1850,6 @@ void Compiler::optCreateComplementaryAssertion(AssertionIndex assertionIndex, Ge
}
}
-// optAssertionGenCast: Create a tentative subrange assertion for a cast.
-//
-// This function will try to create an assertion that the cast's operand
-// is within the "input" range for the cast, so that this assertion can
-// later be proven via implication and the cast removed. Such assertions
-// are only generated during global propagation, and only for LCL_VARs.
-//
-// Arguments:
-// cast - the cast node for which to create the assertion
-//
-// Return Value:
-// Index of the generated assertion, or NO_ASSERTION_INDEX if it was not
-// legal, profitable, or possible to create one.
-//
-AssertionIndex Compiler::optAssertionGenCast(GenTreeCast* cast)
-{
- if (optLocalAssertionProp || !varTypeIsIntegral(cast) || !varTypeIsIntegral(cast->CastOp()))
- {
- return NO_ASSERTION_INDEX;
- }
-
- // This condition exists to preserve previous behavior.
- if (!cast->CastOp()->OperIs(GT_LCL_VAR))
- {
- return NO_ASSERTION_INDEX;
- }
-
- GenTreeLclVar* lclVar = cast->CastOp()->AsLclVar();
- LclVarDsc* varDsc = lvaGetDesc(lclVar);
-
- // It is not useful to make assertions about address-exposed variables, they will never be proven.
- if (varDsc->IsAddressExposed())
- {
- return NO_ASSERTION_INDEX;
- }
-
- // A representation-changing cast cannot be simplified if it is not checked.
- if (!cast->gtOverflow() && (genActualType(cast) != genActualType(lclVar)))
- {
- return NO_ASSERTION_INDEX;
- }
-
- AssertionDsc assertion = {OAK_SUBRANGE};
- assertion.op1.kind = O1K_LCLVAR;
- assertion.op1.vn = vnStore->VNConservativeNormalValue(lclVar->gtVNPair);
- assertion.op1.lcl.lclNum = lclVar->GetLclNum();
- assertion.op1.lcl.ssaNum = lclVar->GetSsaNum();
- assertion.op2.kind = O2K_SUBRANGE;
- assertion.op2.u2 = IntegralRange::ForCastInput(cast);
-
- return optFinalizeCreatingAssertion(&assertion);
-}
-
//------------------------------------------------------------------------
// optCreateJtrueAssertions: Create assertions about a JTRUE's relop operands.
//
@@ -2172,21 +2055,21 @@ AssertionInfo Compiler::optAssertionGenJtrue(GenTree* tree)
// See if we have IND(obj) ==/!= TypeHandle
//
- if (!optLocalAssertionProp && op1->OperIs(GT_IND))
+ if (!optLocalAssertionProp && op1->OperIs(GT_IND) && op1->gtGetOp1()->TypeIs(TYP_REF))
{
- ssize_t cnsValue = 0;
- GenTreeFlags iconFlags = GTF_EMPTY;
- if (op1->gtGetOp1()->TypeIs(TYP_REF) &&
- optIsTreeKnownIntValue(!optLocalAssertionProp, op2, &cnsValue, &iconFlags))
+ ValueNum objVN = optConservativeNormalVN(op1->gtGetOp1());
+ ValueNum typeHndVN = optConservativeNormalVN(op2);
+
+ if ((objVN != ValueNumStore::NoVN) && vnStore->IsVNTypeHandle(typeHndVN))
{
AssertionDsc assertion;
assertion.assertionKind = OAK_EQUAL;
assertion.op1.kind = O1K_EXACT_TYPE;
- assertion.op1.vn = optConservativeNormalVN(op1->gtGetOp1());
+ assertion.op1.vn = objVN;
assertion.op2.kind = O2K_CONST_INT;
- assertion.op2.u1.iconVal = cnsValue;
- assertion.op2.vn = optConservativeNormalVN(op2);
- assertion.op2.SetIconFlag(iconFlags);
+ assertion.op2.u1.iconVal = vnStore->CoercedConstantValue(typeHndVN);
+ assertion.op2.vn = typeHndVN;
+ assertion.op2.SetIconFlag(GTF_ICON_CLASS_HDL);
AssertionIndex index = optAddAssertion(&assertion);
// We don't need to create a complementary assertion here. We're only interested
@@ -2293,15 +2176,18 @@ AssertionInfo Compiler::optAssertionGenJtrue(GenTree* tree)
assert(objectNode->TypeIs(TYP_REF, TYP_I_IMPL));
assert(methodTableNode->TypeIs(TYP_I_IMPL));
- if (methodTableNode->OperIs(GT_CNS_INT))
+ ValueNum objVN = optConservativeNormalVN(objectNode);
+ ValueNum typeHndVN = optConservativeNormalVN(methodTableNode);
+
+ if ((objVN != ValueNumStore::NoVN) && vnStore->IsVNTypeHandle(typeHndVN))
{
AssertionDsc assertion;
assertion.op1.kind = O1K_SUBTYPE;
- assertion.op1.vn = optConservativeNormalVN(objectNode);
+ assertion.op1.vn = objVN;
assertion.op2.kind = O2K_CONST_INT;
- assertion.op2.u1.iconVal = methodTableNode->AsIntCon()->IconValue();
- assertion.op2.vn = optConservativeNormalVN(methodTableNode);
- assertion.op2.SetIconFlag(op2->GetIconHandleFlag());
+ assertion.op2.u1.iconVal = vnStore->CoercedConstantValue(typeHndVN);
+ assertion.op2.vn = typeHndVN;
+ assertion.op2.SetIconFlag(GTF_ICON_CLASS_HDL);
assertion.assertionKind = OAK_EQUAL;
AssertionIndex index = optAddAssertion(&assertion);
@@ -2343,9 +2229,6 @@ void Compiler::optAssertionGen(GenTree* tree)
optAssertionPropCurrentTree = tree;
#endif
- // For most of the assertions that we create below
- // the assertion is true after the tree is processed
- bool assertionProven = true;
AssertionInfo assertionInfo;
switch (tree->OperGet())
{
@@ -2370,7 +2253,11 @@ void Compiler::optAssertionGen(GenTree* tree)
case GT_ARR_LENGTH:
case GT_MDARR_LENGTH:
case GT_MDARR_LOWER_BOUND:
- assertionInfo = optCreateAssertion(tree->GetIndirOrArrMetaDataAddr(), nullptr, OAK_NOT_EQUAL);
+ // These indirs (esp. GT_IND and GT_STOREIND) are the most popular sources of assertions.
+ if (tree->IndirMayFault(this))
+ {
+ assertionInfo = optCreateAssertion(tree->GetIndirOrArrMetaDataAddr(), nullptr, OAK_NOT_EQUAL);
+ }
break;
case GT_INTRINSIC:
@@ -2408,14 +2295,6 @@ void Compiler::optAssertionGen(GenTree* tree)
}
break;
- case GT_CAST:
- // This represets an assertion that we would like to prove to be true.
- // If we can prove this assertion true then we can eliminate this cast.
- // We only create this assertion for global assertion propagation.
- assertionInfo = optAssertionGenCast(tree->AsCast());
- assertionProven = false;
- break;
-
case GT_JTRUE:
assertionInfo = optAssertionGenJtrue(tree);
break;
@@ -2425,7 +2304,7 @@ void Compiler::optAssertionGen(GenTree* tree)
break;
}
- if (assertionInfo.HasAssertion() && assertionProven)
+ if (assertionInfo.HasAssertion())
{
tree->SetAssertionInfo(assertionInfo);
}
@@ -3355,17 +3234,6 @@ GenTree* Compiler::optConstantAssertionProp(AssertionDsc* curAssertion,
newTree->BashToConst(curAssertion->op2.dconVal, tree->TypeGet());
break;
- case O2K_CONST_LONG:
- if (newTree->TypeIs(TYP_LONG))
- {
- newTree->BashToConst(curAssertion->op2.lconVal);
- }
- else
- {
- newTree->BashToConst(static_cast(curAssertion->op2.lconVal));
- }
- break;
-
case O2K_CONST_INT:
// Don't propagate handles if we need to report relocs.
@@ -3760,30 +3628,26 @@ GenTree* Compiler::optAssertionProp_LclVar(ASSERT_VALARG_TP assertions, GenTreeL
continue;
}
- // Constant prop.
- //
- // The case where the tree type could be different than the LclVar type is caused by
- // gtFoldExpr, specifically the case of a cast, where the fold operation changes the type of the LclVar
- // node. In such a case is not safe to perform the substitution since later on the JIT will assert mismatching
- // types between trees.
- //
- if (curAssertion->op1.lcl.lclNum == lclNum)
+ // Verify types match
+ if (tree->TypeGet() != lvaGetRealType(lclNum))
{
- LclVarDsc* const lclDsc = lvaGetDesc(lclNum);
- // Verify types match
- if (tree->TypeGet() == lclDsc->lvType)
- {
- // If local assertion prop, just perform constant prop.
- if (optLocalAssertionProp)
- {
- return optConstantAssertionProp(curAssertion, tree, stmt DEBUGARG(assertionIndex));
- }
+ continue;
+ }
- // If global assertion, perform constant propagation only if the VN's match.
- if (curAssertion->op1.vn == vnStore->VNConservativeNormalValue(tree->gtVNPair))
- {
- return optConstantAssertionProp(curAssertion, tree, stmt DEBUGARG(assertionIndex));
- }
+ if (optLocalAssertionProp)
+ {
+ // Check lclNum in Local Assertion Prop
+ if (curAssertion->op1.lcl.lclNum == lclNum)
+ {
+ return optConstantAssertionProp(curAssertion, tree, stmt DEBUGARG(assertionIndex));
+ }
+ }
+ else
+ {
+ // Check VN in Global Assertion Prop
+ if (curAssertion->op1.vn == vnStore->VNConservativeNormalValue(tree->gtVNPair))
+ {
+ return optConstantAssertionProp(curAssertion, tree, stmt DEBUGARG(assertionIndex));
}
}
}
@@ -6063,10 +5927,6 @@ void Compiler::optImpliedByCopyAssertion(AssertionDsc* copyAssertion, AssertionD
usable = op1MatchesCopy && impAssertion->op2.u2.Contains(depAssertion->op2.u2);
break;
- case O2K_CONST_LONG:
- usable = op1MatchesCopy && (impAssertion->op2.lconVal == depAssertion->op2.lconVal);
- break;
-
case O2K_CONST_DOUBLE:
// Exact memory match because of positive and negative zero
usable = op1MatchesCopy &&
diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h
index 49b7ebc20576b8..4fbf60d14169b4 100644
--- a/src/coreclr/jit/block.h
+++ b/src/coreclr/jit/block.h
@@ -574,10 +574,6 @@ enum class BasicBlockVisit
// The bbPreds list is initially created by Compiler::fgLinkBasicBlocks()
// and is incrementally kept up to date.
//
-// The edge weight are computed by Compiler::fgComputeEdgeWeights()
-// the edge weights are used to straighten conditional branches
-// by Compiler::fgReorderBlocks()
-//
struct FlowEdge
{
private:
diff --git a/src/coreclr/jit/clrjit.natvis b/src/coreclr/jit/clrjit.natvis
index 54661833ef8552..58fadf2251da47 100644
--- a/src/coreclr/jit/clrjit.natvis
+++ b/src/coreclr/jit/clrjit.natvis
@@ -266,6 +266,7 @@ Documentation for VS debugger format specifiers: https://learn.microsoft.com/vis
(LcJaggedArrayOptInfo*)this,nd
(LcMdArrayOptInfo*)this,nd
+ (LcSpanOptInfo*)this,nd
diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h
index b26c93534b2f9d..fabf3ec922bf42 100644
--- a/src/coreclr/jit/codegen.h
+++ b/src/coreclr/jit/codegen.h
@@ -425,10 +425,6 @@ class CodeGen final : public CodeGenInterface
void genOSRSaveRemainingCalleeSavedRegisters();
#endif // TARGET_AMD64
-#if defined(TARGET_RISCV64)
- void genStackProbe(ssize_t frameSize, regNumber rOffset, regNumber rLimit, regNumber rPageSize);
-#endif
-
void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn);
void genPoisonFrame(regMaskTP bbRegLiveIn);
@@ -1059,6 +1055,8 @@ class CodeGen final : public CodeGenInterface
template
void genHWIntrinsicJumpTableFallback(NamedIntrinsic intrinsic,
+ instruction ins,
+ emitAttr attr,
regNumber nonConstImmReg,
regNumber baseReg,
regNumber offsReg,
diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp
index f51023c4305221..01843d0c8b6128 100644
--- a/src/coreclr/jit/codegenlinear.cpp
+++ b/src/coreclr/jit/codegenlinear.cpp
@@ -2484,8 +2484,11 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
}
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
- // For LoongArch64's ISA which is same with the MIPS64 ISA, even the instructions of 32bits operation need
- // the upper 32bits be sign-extended to 64 bits.
+ // TODO-LOONGARCH64:
+ // TODO-RISCV64:
+ // LoongArch64 and RiscV64 ABIs require 32-bit values to be sign-extended to 64-bits.
+ // We apply the sign-extension unconditionally here to avoid corner case bugs, even
+ // though it may not be strictly necessary in all cases.
m_extendKind = SIGN_EXTEND_INT;
#else
m_extendKind = COPY;
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index de929110cf7eb1..f7840c2f144a9c 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -1924,7 +1924,40 @@ void CodeGen::genCodeForNegNot(GenTree* tree)
//
void CodeGen::genCodeForBswap(GenTree* tree)
{
- NYI_LOONGARCH64("genCodeForBswap unimpleement yet");
+ assert(tree->OperIs(GT_BSWAP, GT_BSWAP16));
+
+ emitAttr attr = emitActualTypeSize(tree);
+ regNumber targetReg = tree->GetRegNum();
+ emitter* emit = GetEmitter();
+
+ GenTree* operand = tree->gtGetOp1();
+ assert(!operand->isContained());
+ // The src must be a register.
+ regNumber operandReg = genConsumeReg(operand);
+ instruction ins;
+
+ if (tree->OperIs(GT_BSWAP16))
+ {
+ ins = INS_revb_4h;
+ }
+ else if (attr == EA_8BYTE)
+ {
+ ins = INS_revb_d;
+ }
+ else
+ {
+ assert(attr == EA_4BYTE);
+ ins = INS_revb_2w;
+ }
+
+ emit->emitIns_R_R(ins, attr, targetReg, operandReg);
+
+ if (tree->OperIs(GT_BSWAP16) && !genCanOmitNormalizationForBswap16(tree))
+ {
+ emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, targetReg, targetReg, 15, 0);
+ }
+
+ genProduceReg(tree);
}
//------------------------------------------------------------------------
diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp
index b57ffe66b6af5b..7e279ac369dc9d 100644
--- a/src/coreclr/jit/codegenriscv64.cpp
+++ b/src/coreclr/jit/codegenriscv64.cpp
@@ -1627,7 +1627,7 @@ void CodeGen::genLclHeap(GenTree* tree)
// The SP might already be in the guard page, so we must touch it BEFORE
// the alloc, not after.
- // ld_w r0, 0(SP)
+ // tickle the page - this triggers a page fault when on the guard page
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, REG_SP, 0);
lastTouchDelta = amount;
@@ -1672,8 +1672,7 @@ void CodeGen::genLclHeap(GenTree* tree)
// and localloc size is a multiple of STACK_ALIGN.
// Loop:
- ssize_t imm = -16;
- emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm);
+ emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -16);
emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, 8);
emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, 0);
@@ -1685,8 +1684,8 @@ void CodeGen::genLclHeap(GenTree* tree)
emit->emitIns_R_R_I(INS_addi, emitActualTypeSize(type), regCnt, regCnt, -16);
- assert(imm == (-4 << 2)); // goto loop.
- emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, (-4 << 2));
+ // goto Loop
+ emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, -4 << 2);
lastTouchDelta = 0;
}
@@ -1700,7 +1699,6 @@ void CodeGen::genLclHeap(GenTree* tree)
// case SP is on the last byte of the guard page. Thus you must
// touch SP-0 first not SP-0x1000.
//
- // This is similar to the prolog code in CodeGen::genAllocLclFrame().
//
// Note that we go through a few hoops so that SP never points to
// illegal pages at any time during the tickling process.
@@ -1711,23 +1709,20 @@ void CodeGen::genLclHeap(GenTree* tree)
// addi regCnt, REG_R0, 0
//
// Skip:
- // lui regTmp, eeGetPageSize()>>12
+ // lui regPageSize, eeGetPageSize()>>12
+ // addi regTmp, SP, 0
// Loop:
- // lw r0, 0(SP) // tickle the page - read from the page
- // sub RA, SP, regTmp // decrement SP by eeGetPageSize()
- // bltu RA, regCnt, Done
- // sub SP, SP,regTmp
- // j Loop
+ // lw r0, 0(regTmp) // tickle the page - read from the page
+ // sub regTmp, regTmp, regPageSize
+ // bgeu regTmp, regCnt, Loop
//
// Done:
- // mov SP, regCnt
+ // addi SP, regCnt, 0
//
if (tempReg == REG_NA)
tempReg = internalRegisters.Extract(tree);
- regNumber rPageSize = internalRegisters.GetSingle(tree);
-
assert(regCnt != tempReg);
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt);
@@ -1738,35 +1733,24 @@ void CodeGen::genLclHeap(GenTree* tree)
emit->emitIns_R_R_I(INS_beq, EA_PTRSIZE, tempReg, REG_R0, 2 << 2);
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, regCnt, REG_R0, 0);
- emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12);
-
- // genDefineTempLabel(loop);
-
- // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page
- emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, REG_SPBASE, 0);
-
- // decrement SP by eeGetPageSize()
- emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, tempReg, REG_SPBASE, rPageSize);
-
- assert(rPageSize != tempReg);
-
- ssize_t imm = 3 << 2; // goto done.
- emit->emitIns_R_R_I(INS_bltu, EA_PTRSIZE, tempReg, regCnt, imm);
+ regNumber rPageSize = internalRegisters.GetSingle(tree);
- emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, rPageSize);
+ noway_assert(rPageSize != tempReg);
- imm = -4 << 2;
- // Jump to loop and tickle new stack address
- emit->emitIns_I(INS_j, EA_PTRSIZE, imm);
+ emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12);
+ regSet.verifyRegUsed(rPageSize);
+ emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, REG_SPBASE, 0);
- // Done with stack tickle loop
- // genDefineTempLabel(done);
+ // tickle the page - this triggers a page fault when on the guard page
+ emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, tempReg, 0);
+ emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tempReg, tempReg, rPageSize);
- // Now just move the final value to SP
- emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_SPBASE, regCnt, 0);
+ emit->emitIns_R_R_I(INS_bgeu, EA_PTRSIZE, tempReg, regCnt, -2 << 2);
// lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space,
// we're going to assume the worst and probe.
+ // Move the final value to SP
+ emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, regCnt, 0);
}
ALLOC_DONE:
@@ -6672,175 +6656,6 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
};
}
-//------------------------------------------------------------------------
-// genStackProbe: Probe the stack without changing it
-//
-// Notes:
-// This function is using loop to probe each memory page.
-//
-// Arguments:
-// frameSize - total frame size
-// rOffset - usually initial register number
-// rLimit - an extra register for comparison
-// rPageSize - register for storing page size
-//
-void CodeGen::genStackProbe(ssize_t frameSize, regNumber rOffset, regNumber rLimit, regNumber rPageSize)
-{
- // make sure frameSize safely fits within 4 bytes
- noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize);
-
- const target_size_t pageSize = compiler->eeGetPageSize();
-
- // According to RISC-V Privileged ISA page size should be equal 4KiB
- noway_assert(pageSize == 0x1000);
-
- emitter* emit = GetEmitter();
-
- emit->emitLoadImmediate(EA_PTRSIZE, rLimit, -frameSize);
- regSet.verifyRegUsed(rLimit);
-
- emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, rLimit, rLimit, REG_SPBASE);
-
- emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12);
- regSet.verifyRegUsed(rPageSize);
-
- emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, rOffset, REG_SPBASE, rPageSize);
-
- // Loop:
- // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page
- emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, rOffset, 0);
- emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, rOffset, rOffset, rPageSize);
-
- // each instr is 4 bytes
- // if (rOffset >= rLimit) goto Loop;
- emit->emitIns_R_R_I(INS_bge, EA_PTRSIZE, rOffset, rLimit, -2 << 2);
-}
-
-//------------------------------------------------------------------------
-// genAllocLclFrame: Probe the stack.
-//
-// Notes:
-// This only does the probing; allocating the frame is done when callee-saved registers are saved.
-// This is done before anything has been pushed. The previous frame might have a large outgoing argument
-// space that has been allocated, but the lowest addresses have not been touched. Our frame setup might
-// not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however,
-// there are always three guard pages, so we will not miss them all. On Linux, there is only one guard
-// page by default, so we need to be more careful. We do an extra probe if we might not have probed
-// recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this
-// on Windows as well just to be consistent, even though it should not be necessary.
-//
-// Arguments:
-// frameSize - the size of the stack frame being allocated.
-// initReg - register to use as a scratch register.
-// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if
-// this call sets 'initReg' to a non-zero value. Otherwise, it is unchanged.
-// maskArgRegsLiveIn - incoming argument registers that are currently live.
-//
-// Return value:
-// None
-//
-void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
-{
- assert(compiler->compGeneratingProlog);
-
- if (frameSize == 0)
- {
- return;
- }
-
- // According to RISC-V Privileged ISA page size should be equal 4KiB
- const target_size_t pageSize = compiler->eeGetPageSize();
-
- assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));
-
- target_size_t lastTouchDelta = 0;
-
- emitter* emit = GetEmitter();
-
- // Emit the following sequence to 'tickle' the pages.
- // Note it is important that stack pointer not change until this is complete since the tickles
- // could cause a stack overflow, and we need to be able to crawl the stack afterward
- // (which means the stack pointer needs to be known).
-
- if (frameSize < pageSize)
- {
- // no probe needed
- lastTouchDelta = frameSize;
- }
- else if (frameSize < 3 * pageSize)
- {
- // between 1 and 3 pages we will probe each page without a loop,
- // because it is faster that way and doesn't cost us much
- lastTouchDelta = frameSize;
-
- for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize)
- {
- emit->emitIns_R_I(INS_lui, EA_PTRSIZE, initReg, probeOffset >> 12);
- regSet.verifyRegUsed(initReg);
-
- emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, initReg, REG_SPBASE, initReg);
- emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, initReg, 0);
-
- lastTouchDelta -= pageSize;
- }
-
- assert(pInitRegZeroed != nullptr);
- *pInitRegZeroed = false; // The initReg does not contain zero
-
- assert(lastTouchDelta == frameSize % pageSize);
- compiler->unwindPadding();
- }
- else
- {
- // probe each page, that we need to allocate large stack frame
- assert(frameSize >= 3 * pageSize);
-
- regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED);
- availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers
- // as they are currently live
- availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg
-
- noway_assert(availMask != RBM_NONE);
-
- regMaskTP regMask = genFindLowestBit(availMask);
- regNumber rLimit = genRegNumFromMask(regMask);
-
- availMask &= ~regMask; // Remove rLimit register
-
- noway_assert(availMask != RBM_NONE);
-
- regMask = genFindLowestBit(availMask);
- regNumber rPageSize = genRegNumFromMask(regMask);
-
- genStackProbe((ssize_t)frameSize, initReg, rLimit, rPageSize);
-
- assert(pInitRegZeroed != nullptr);
- *pInitRegZeroed = false; // The initReg does not contain zero
-
- lastTouchDelta = frameSize % pageSize;
- compiler->unwindPadding();
- }
-
-#if STACK_PROBE_BOUNDARY_THRESHOLD_BYTES != 0
- // if the last page was too far, we will make an extra probe at the bottom
- if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)
- {
- assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < pageSize << 1);
-
- emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, initReg, REG_R0, frameSize);
- regSet.verifyRegUsed(initReg);
-
- emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, initReg, REG_SPBASE, initReg);
- emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, initReg, 0);
-
- assert(pInitRegZeroed != nullptr);
- *pInitRegZeroed = false; // The initReg does not contain zero
-
- compiler->unwindPadding();
- }
-#endif
-}
-
void CodeGen::genJumpToThrowHlpBlk_la(
SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk, regNumber reg2)
{
diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index 4b14b8d1c451dd..0f41ad622f8e82 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -2200,13 +2200,22 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
#if defined(FEATURE_EH_WINDOWS_X86)
case GT_END_LFIN:
+ {
+ // Find the eh table entry via the eh ID
+ //
+ unsigned const ehID = (unsigned)treeNode->AsVal()->gtVal1;
+ assert(ehID < compiler->compEHID);
+ assert(compiler->m_EHIDtoEHblkDsc != nullptr);
+
+ EHblkDsc* HBtab = nullptr;
+ bool found = compiler->m_EHIDtoEHblkDsc->Lookup(ehID, &HBtab);
+ assert(found);
+ assert(HBtab != nullptr);
// Have to clear the ShadowSP of the nesting level which encloses the finally. Generates:
// mov dword ptr [ebp-0xC], 0 // for some slot of the ShadowSP local var
-
- size_t finallyNesting;
- finallyNesting = treeNode->AsVal()->gtVal1;
- noway_assert(treeNode->AsVal()->gtVal1 < compiler->compHndBBtabCount);
+ //
+ const size_t finallyNesting = HBtab->ebdHandlerNestingLevel;
noway_assert(finallyNesting < compiler->compHndBBtabCount);
// The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
@@ -2220,6 +2229,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
GetEmitter()->emitIns_S_I(INS_mov, EA_PTRSIZE, compiler->lvaShadowSPslotsVar, (unsigned)curNestingSlotOffs,
0);
break;
+ }
#endif // FEATURE_EH_WINDOWS_X86
case GT_PINVOKE_PROLOG:
@@ -4798,17 +4808,16 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
// tree - the bit shift node (that specifies the type of bit shift to perform).
//
// Assumptions:
-// a) All GenTrees are register allocated.
-// b) The shift-by-amount in tree->AsOp()->gtOp2 is either a contained constant or
-// it's a register-allocated expression. If it is in a register that is
-// not RCX, it will be moved to RCX (so RCX better not be in use!).
+// The shift-by-amount in tree->AsOp()->gtOp2 is either a contained constant or it's a
+// register-allocated expression. If not using BMI2 instructions and op2 is in a register
+// that is not RCX, it will be moved to RCX (so RCX better not be in use!).
//
void CodeGen::genCodeForShift(GenTree* tree)
{
// Only the non-RMW case here.
assert(tree->OperIsShiftOrRotate());
- assert(tree->AsOp()->gtOp1->isUsedFromReg());
assert(tree->GetRegNum() != REG_NA);
+ assert(tree->AsOp()->gtOp1->isUsedFromReg() || compiler->compIsaSupportedDebugOnly(InstructionSet_BMI2));
genConsumeOperands(tree->AsOp());
@@ -4819,12 +4828,13 @@ void CodeGen::genCodeForShift(GenTree* tree)
regNumber operandReg = operand->GetRegNum();
GenTree* shiftBy = tree->gtGetOp2();
+ emitAttr size = emitTypeSize(tree);
if (shiftBy->isContainedIntOrIImmed())
{
- emitAttr size = emitTypeSize(tree);
+ assert(tree->OperIsRotate() || (operandReg != REG_NA));
- bool mightOptimizeLsh = tree->OperIs(GT_LSH) && !tree->gtOverflowEx() && !tree->gtSetFlags();
+ bool mightOptimizeLsh = tree->OperIs(GT_LSH) && !tree->gtSetFlags();
// Optimize "X<<1" to "lea [reg+reg]" or "add reg, reg"
if (mightOptimizeLsh && shiftBy->IsIntegralConst(1))
@@ -4838,14 +4848,14 @@ void CodeGen::genCodeForShift(GenTree* tree)
GetEmitter()->emitIns_R_ARX(INS_lea, size, tree->GetRegNum(), operandReg, operandReg, 1, 0);
}
}
- // Optimize "X<<2" to "lea [reg*4]" - we only do this when the dst and src registers are different since it will
- // remove a 'mov'.
+ // Optimize "X<<2" to "lea [reg*4]"
+ // We only do this when the dst and src registers are different since it will remove a 'mov'.
else if (mightOptimizeLsh && shiftBy->IsIntegralConst(2) && tree->GetRegNum() != operandReg)
{
GetEmitter()->emitIns_R_ARX(INS_lea, size, tree->GetRegNum(), REG_NA, operandReg, 4, 0);
}
- // Optimize "X<<3" to "lea [reg*8]" - we only do this when the dst and src registers are different since it will
- // remove a 'mov'.
+ // Optimize "X<<3" to "lea [reg*8]"
+ // We only do this when the dst and src registers are different since it will remove a 'mov'.
else if (mightOptimizeLsh && shiftBy->IsIntegralConst(3) && tree->GetRegNum() != operandReg)
{
GetEmitter()->emitIns_R_ARX(INS_lea, size, tree->GetRegNum(), REG_NA, operandReg, 8, 0);
@@ -4854,53 +4864,54 @@ void CodeGen::genCodeForShift(GenTree* tree)
{
int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
-#if defined(TARGET_64BIT)
- // Try to emit rorx if BMI2 is available instead of mov+rol
- // it makes sense only for 64bit integers
- if ((genActualType(targetType) == TYP_LONG) && (tree->GetRegNum() != operandReg) &&
- compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL, GT_ROR) &&
- (shiftByValue > 0) && (shiftByValue < 64))
+ if (tree->OperIsRotate() && compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2))
{
- const int value = tree->OperIs(GT_ROL) ? (64 - shiftByValue) : shiftByValue;
- GetEmitter()->emitIns_R_R_I(INS_rorx, size, tree->GetRegNum(), operandReg, value);
- genProduceReg(tree);
- return;
+ // If we have a contained source operand, we must emit rorx.
+ // We may also use rorx for 64bit values when a mov would otherwise be required,
+ // because rorx is smaller than mov+rol/ror when REX prefix is included.
+
+ if ((operandReg == REG_NA) || ((varTypeIsLong(targetType) && (tree->GetRegNum() != operandReg))))
+ {
+ // There is no 'rolx', so for rol, we use rorx with the shift value adjusted.
+ if (tree->OperIs(GT_ROL))
+ {
+ shiftByValue &= (size * BITS_PER_BYTE - 1);
+ shiftByValue = (size * BITS_PER_BYTE - shiftByValue);
+ }
+
+ inst_RV_TT_IV(INS_rorx, size, tree->GetRegNum(), operand, shiftByValue, INS_OPTS_NONE);
+ genProduceReg(tree);
+ return;
+ }
}
-#endif
+
ins = genMapShiftInsToShiftByConstantIns(ins, shiftByValue);
GetEmitter()->emitIns_BASE_R_R_I(ins, emitTypeSize(tree), tree->GetRegNum(), operandReg, shiftByValue);
genProduceReg(tree);
return;
}
}
-#if defined(TARGET_64BIT)
else if (tree->OperIsShift() && compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2))
{
- // Try to emit shlx, sarx, shrx if BMI2 is available instead of mov+shl, mov+sar, mov+shr.
+ // Emit shlx, sarx, shrx if BMI2 is available instead of mov+shl, mov+sar, mov+shr.
switch (tree->OperGet())
{
case GT_LSH:
ins = INS_shlx;
break;
-
case GT_RSH:
ins = INS_sarx;
break;
-
case GT_RSZ:
ins = INS_shrx;
break;
-
default:
unreached();
}
- regNumber shiftByReg = shiftBy->GetRegNum();
- emitAttr size = emitTypeSize(tree);
- // The order of operandReg and shiftByReg are swapped to follow shlx, sarx and shrx encoding spec.
- GetEmitter()->emitIns_R_R_R(ins, size, tree->GetRegNum(), shiftByReg, operandReg);
+ // The order of operand and shiftBy are swapped to follow shlx, sarx and shrx encoding spec.
+ inst_RV_RV_TT(ins, size, tree->GetRegNum(), shiftBy->GetRegNum(), operand, /*isRMW*/ false, INS_OPTS_NONE);
}
-#endif
else
{
// We must have the number of bits to shift stored in ECX, since we constrained this node to
@@ -5674,6 +5685,13 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
// These intrinsics are "ins reg/mem, xmm"
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
attr = emitActualTypeSize(baseType);
+#if defined(TARGET_X86)
+ if (varTypeIsLong(baseType))
+ {
+ ins = INS_movq;
+ attr = EA_8BYTE;
+ }
+#endif // TARGET_X86
break;
}
@@ -9126,10 +9144,6 @@ void CodeGen::genAmd64EmitterUnitTestsApx()
theEmitter->emitIns_R_R_I(INS_shld, EA_4BYTE, REG_EAX, REG_ECX, 5);
theEmitter->emitIns_R_R_I(INS_shrd, EA_2BYTE, REG_EAX, REG_ECX, 5);
- // TODO-XArch-apx: S_R_I path only accepts SEE or VEX instructions,
- // so I assuem shld/shrd will not be taking the first argument from stack.
- // theEmitter->emitIns_S_R_I(INS_shld, EA_2BYTE, 1, 2, REG_EAX, 5);
- // theEmitter->emitIns_S_R_I(INS_shrd, EA_2BYTE, 1, 2, REG_EAX, 5);
theEmitter->emitIns_AR_R(INS_cmpxchg, EA_2BYTE, REG_EAX, REG_EDX, 2);
@@ -9299,6 +9313,51 @@ void CodeGen::genAmd64EmitterUnitTestsApx()
theEmitter->emitIns_BASE_R_R(INS_inc, EA_4BYTE, REG_R11, REG_R12);
theEmitter->emitIns_BASE_R_R_I(INS_add, EA_4BYTE, REG_R11, REG_R12, 5);
+
+ // testing for EGPR encodings.
+ GenTreePhysReg eGPR(REG_R16);
+ eGPR.SetRegNum(REG_R16);
+ GenTreeIndir loadGPR = indirForm(TYP_SIMD32, &eGPR);
+
+ // // SIMD instructions
+ // // In most of the cases, EGPR will only be used as BASE/INDEX registers in SIMD instructions.
+ theEmitter->emitIns_R_R_A(INS_addps, EA_32BYTE, REG_XMM16, REG_XMM16, &loadGPR);
+
+ // // Legacy instructions
+ theEmitter->emitIns_R_ARX(INS_add, EA_4BYTE, REG_R16, REG_R17, REG_R18, 1, 0);
+
+ theEmitter->emitIns_AR_R(INS_movnti, EA_8BYTE, REG_R17, REG_R16, 10);
+ theEmitter->emitIns_R_R_R(INS_andn, EA_8BYTE, REG_R17, REG_R16, REG_R18);
+
+ theEmitter->emitIns_Mov(INS_kmovb_gpr, EA_4BYTE, REG_R16, REG_K0, false);
+ theEmitter->emitIns_Mov(INS_kmovb_msk, EA_4BYTE, REG_K5, REG_K0, false);
+ theEmitter->emitIns_Mov(INS_kmovw_gpr, EA_4BYTE, REG_R16, REG_K0, false);
+ theEmitter->emitIns_Mov(INS_kmovw_msk, EA_4BYTE, REG_K5, REG_K0, false);
+ theEmitter->emitIns_Mov(INS_kmovd_gpr, EA_4BYTE, REG_R16, REG_K0, false);
+ theEmitter->emitIns_Mov(INS_kmovd_msk, EA_4BYTE, REG_K5, REG_K0, false);
+ theEmitter->emitIns_Mov(INS_kmovq_gpr, EA_8BYTE, REG_R16, REG_K0, false);
+ theEmitter->emitIns_Mov(INS_kmovq_msk, EA_8BYTE, REG_K5, REG_K0, false);
+
+ theEmitter->emitIns_R_R(INS_crc32_apx, EA_1BYTE, REG_R16, REG_R17);
+ theEmitter->emitIns_R_R(INS_crc32_apx, EA_2BYTE, REG_R16, REG_R17);
+ theEmitter->emitIns_R_R(INS_crc32_apx, EA_8BYTE, REG_R16, REG_R17);
+ theEmitter->emitIns_R_A(INS_crc32_apx, EA_8BYTE, REG_R18, &loadGPR);
+ theEmitter->emitIns_R_S(INS_crc32_apx, EA_8BYTE, REG_R18, 0, 0);
+
+ // Note that BZHI has a reversed src operands due to special handling at import.
+ theEmitter->emitIns_R_R_R(INS_bzhi, EA_4BYTE, REG_R16, REG_R18, REG_R17);
+ theEmitter->emitIns_R_R_R(INS_bzhi, EA_8BYTE, REG_R16, REG_R18, REG_R17);
+ theEmitter->emitIns_R_R_R(INS_mulx, EA_4BYTE, REG_R16, REG_R18, REG_R17);
+ theEmitter->emitIns_R_R_R(INS_mulx, EA_8BYTE, REG_R16, REG_R18, REG_R17);
+ theEmitter->emitIns_R_R_R(INS_pdep, EA_4BYTE, REG_R16, REG_R18, REG_R17);
+ theEmitter->emitIns_R_R_R(INS_pdep, EA_8BYTE, REG_R16, REG_R18, REG_R17);
+ theEmitter->emitIns_R_R_R(INS_pext, EA_4BYTE, REG_R16, REG_R18, REG_R17);
+ theEmitter->emitIns_R_R_R(INS_pext, EA_8BYTE, REG_R16, REG_R18, REG_R17);
+
+ theEmitter->emitIns_Mov(INS_movd, EA_4BYTE, REG_R16, REG_XMM0, false);
+ theEmitter->emitIns_Mov(INS_movd, EA_4BYTE, REG_R16, REG_XMM16, false);
+ theEmitter->emitIns_Mov(INS_movq, EA_8BYTE, REG_R16, REG_XMM0, false);
+ theEmitter->emitIns_Mov(INS_movq, EA_8BYTE, REG_R16, REG_XMM16, false);
}
void CodeGen::genAmd64EmitterUnitTestsAvx10v2()
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index b0797e1b27334f..adf582247da3e2 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -1984,9 +1984,34 @@ void Compiler::compSetProcessor()
// don't actually exist. The JIT is in charge of adding those and ensuring
// the total sum of flags is still valid.
#if defined(TARGET_XARCH)
- // Get the preferred vector bitwidth, rounding down to the nearest multiple of 128-bits
- uint32_t preferredVectorBitWidth = (ReinterpretHexAsDecimal(JitConfig.PreferredVectorBitWidth()) / 128) * 128;
- uint32_t preferredVectorByteLength = preferredVectorBitWidth / 8;
+ // If the VM passed in a virtual vector ISA, it was done to communicate PreferredVectorBitWidth.
+ // No check is done for the validity of the value, since it will be clamped to max supported by
+ // hardware and config when queried. We will, therefore, remove the marker ISA and allow it to
+ // be re-added if appropriate based on the hardware ISA evaluations below.
+
+ uint32_t preferredVectorBitWidth = 0;
+ if (instructionSetFlags.HasInstructionSet(InstructionSet_Vector128))
+ {
+ instructionSetFlags.RemoveInstructionSet(InstructionSet_Vector128);
+ preferredVectorBitWidth = 128;
+ }
+ else if (instructionSetFlags.HasInstructionSet(InstructionSet_Vector256))
+ {
+ instructionSetFlags.RemoveInstructionSet(InstructionSet_Vector256);
+ preferredVectorBitWidth = 256;
+ }
+ else if (instructionSetFlags.HasInstructionSet(InstructionSet_Vector512))
+ {
+ instructionSetFlags.RemoveInstructionSet(InstructionSet_Vector512);
+ preferredVectorBitWidth = 512;
+ }
+
+ opts.preferredVectorByteLength = preferredVectorBitWidth / BITS_PER_BYTE;
+
+ // Only one marker ISA should have been passed in, and it should now be cleared.
+ assert(!instructionSetFlags.HasInstructionSet(InstructionSet_Vector128) &&
+ !instructionSetFlags.HasInstructionSet(InstructionSet_Vector256) &&
+ !instructionSetFlags.HasInstructionSet(InstructionSet_Vector512));
if (instructionSetFlags.HasInstructionSet(InstructionSet_SSE))
{
@@ -2018,20 +2043,6 @@ void Compiler::compSetProcessor()
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL));
instructionSetFlags.AddInstructionSet(InstructionSet_Vector512);
-
- if ((preferredVectorByteLength == 0) && jitFlags.IsSet(JitFlags::JIT_FLAG_VECTOR512_THROTTLING))
- {
- // Some architectures can experience frequency throttling when
- // executing 512-bit width instructions. To account for this we set the
- // default preferred vector width to 256-bits in some scenarios. Power
- // users can override this with `DOTNET_PreferredVectorBitWidth=512` to
- // allow using such instructions where hardware support is available.
- //
- // Do not condition this based on stress mode as it makes the support
- // reported inconsistent across methods and breaks expectations/functionality
-
- preferredVectorByteLength = 256 / 8;
- }
}
else
{
@@ -2039,8 +2050,6 @@ void Compiler::compSetProcessor()
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX10v1));
}
}
-
- opts.preferredVectorByteLength = preferredVectorByteLength;
#elif defined(TARGET_ARM64)
if (instructionSetFlags.HasInstructionSet(InstructionSet_AdvSimd))
{
@@ -2574,6 +2583,18 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
#endif // DEBUG
}
+ bool enableInliningMethodsWithEH = JitConfig.JitInlineMethodsWithEH() > 0;
+
+#ifdef DEBUG
+ static ConfigMethodRange JitInlineMethodsWithEHRange;
+ JitInlineMethodsWithEHRange.EnsureInit(JitConfig.JitInlineMethodsWithEHRange());
+ const unsigned hash = impInlineRoot()->info.compMethodHash();
+ const bool inRange = JitInlineMethodsWithEHRange.Contains(hash);
+ enableInliningMethodsWithEH &= inRange;
+#endif
+
+ opts.compInlineMethodsWithEH = enableInliningMethodsWithEH;
+
if (compIsForInlining())
{
return;
@@ -5021,17 +5042,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
// We won't introduce new blocks from here on out,
// so run the new block layout.
//
- if (JitConfig.JitDoReversePostOrderLayout())
- {
- DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::fgSearchImprovedLayout);
- }
- else
- {
- // If we didn't run 3-opt, we might still have a profile-aware DFS tree computed during LSRA available.
- // This tree's presence can trigger asserts if pre/postorder numbers are recomputed,
- // so invalidate the tree either way.
- fgInvalidateDfsTree();
- }
+ DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::fgSearchImprovedLayout);
// Now that the flowgraph is finalized, run post-layout optimizations.
//
@@ -5207,6 +5218,21 @@ void Compiler::FinalizeEH()
lvaSetVarAddrExposed(lvaShadowSPslotsVar DEBUGARG(AddressExposedReason::EXTERNALLY_VISIBLE_IMPLICITLY));
}
+ // Build up a mapping from EH IDs to EHblkDsc*
+ //
+ assert(m_EHIDtoEHblkDsc == nullptr);
+
+ if (compHndBBtabCount > 0)
+ {
+ m_EHIDtoEHblkDsc = new (getAllocator()) EHIDtoEHblkDscMap(getAllocator());
+
+ for (unsigned XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
+ {
+ EHblkDsc* const HBtab = &compHndBBtab[XTnum];
+ m_EHIDtoEHblkDsc->Set(HBtab->ebdID, HBtab);
+ }
+ }
+
#endif // FEATURE_EH_WINDOWS_X86
// We should not make any more alterations to the EH table structure.
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 9d265af58c019f..1be19a765767c7 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -506,19 +506,6 @@ enum class AddressExposedReason
class LclVarDsc
{
public:
- // The constructor. Most things can just be zero'ed.
- //
- // Initialize the ArgRegs to REG_STK.
- LclVarDsc()
- : _lvArgReg(REG_STK)
-#if FEATURE_MULTIREG_ARGS
- , _lvOtherArgReg(REG_STK)
-#endif // FEATURE_MULTIREG_ARGS
- , lvClassHnd(NO_CLASS_HANDLE)
- , lvPerSsaData()
- {
- }
-
// note this only packs because var_types is a typedef of unsigned char
var_types lvType : 5; // TYP_INT/LONG/FLOAT/DOUBLE/REF
@@ -778,13 +765,6 @@ class LclVarDsc
regNumberSmall _lvOtherReg; // Used for "upper half" of long var.
#endif // !defined(TARGET_64BIT)
- regNumberSmall _lvArgReg; // The (first) register in which this argument is passed.
-
-#if FEATURE_MULTIREG_ARGS
- regNumberSmall _lvOtherArgReg; // Used for the second part of the struct passed in a register.
- // Note this is defined but not used by ARM32
-#endif // FEATURE_MULTIREG_ARGS
-
regNumberSmall _lvArgInitReg; // the register into which the argument is moved at entry
public:
@@ -997,7 +977,7 @@ class LclVarDsc
unsigned lvSlotNum; // original slot # (if remapped)
// class handle for the local or null if not known or not a class
- CORINFO_CLASS_HANDLE lvClassHnd;
+ CORINFO_CLASS_HANDLE lvClassHnd = NO_CLASS_HANDLE;
private:
ClassLayout* m_layout; // layout info for structs
@@ -2722,8 +2702,12 @@ class Compiler
// etc.
unsigned ehMaxHndNestingCount = 0;
+ typedef JitHashTable, EHblkDsc*> EHIDtoEHblkDscMap;
+ EHIDtoEHblkDscMap* m_EHIDtoEHblkDsc = nullptr;
+
#endif // FEATURE_EH_WINDOWS_X86
+ EHblkDsc* ehFindEHblkDscById(unsigned short ehID);
bool ehTableFinalized = false;
void FinalizeEH();
@@ -3356,11 +3340,19 @@ class Compiler
GenTree* gtNewSimdRoundNode(
var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize);
+ GenTree* gtNewSimdShuffleVariableNode(var_types type,
+ GenTree* op1,
+ GenTree* op2,
+ CorInfoType simdBaseJitType,
+ unsigned simdSize,
+ bool isShuffleNative);
+
GenTree* gtNewSimdShuffleNode(var_types type,
GenTree* op1,
GenTree* op2,
CorInfoType simdBaseJitType,
- unsigned simdSize);
+ unsigned simdSize,
+ bool isShuffleNative);
GenTree* gtNewSimdSqrtNode(
var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize);
@@ -3963,7 +3955,7 @@ class Compiler
unsigned lvaInlineeReturnSpillTemp = BAD_VAR_NUM; // The temp to spill the non-VOID return expression
// in case there are multiple BBJ_RETURN blocks in the inlinee
// or if the inlinee has GC ref locals.
-
+
bool lvaInlineeReturnSpillTempFreshlyCreated = false; // True if the temp was freshly created for the inlinee return
#if FEATURE_FIXED_OUT_ARGS
@@ -4491,7 +4483,7 @@ class Compiler
CompAllocator alloc(compiler->getAllocator(CMK_Generic));
compiler->impEnumeratorGdvLocalMap = new (alloc) NodeToUnsignedMap(alloc);
}
-
+
return compiler->impEnumeratorGdvLocalMap;
}
@@ -4685,7 +4677,11 @@ class Compiler
bool mustExpand);
#ifdef FEATURE_HW_INTRINSICS
- bool IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_types simdBaseType) const;
+ bool IsValidForShuffle(GenTree* indices,
+ unsigned simdSize,
+ var_types simdBaseType,
+ bool* canBecomeValid,
+ bool isShuffleNative) const;
GenTree* impHWIntrinsic(NamedIntrinsic intrinsic,
CORINFO_CLASS_HANDLE clsHnd,
@@ -5505,6 +5501,7 @@ class Compiler
void fgExpandQmarkNodes();
bool fgSimpleLowerCastOfSmpOp(LIR::Range& range, GenTreeCast* cast);
+ bool fgSimpleLowerBswap16(LIR::Range& range, GenTree* op);
#if FEATURE_LOOP_ALIGN
bool shouldAlignLoop(FlowGraphNaturalLoop* loop, BasicBlock* top);
@@ -6188,7 +6185,6 @@ class Compiler
PhaseStatus fgComputeBlockWeights();
bool fgComputeMissingBlockWeights();
- bool fgReorderBlocks(bool useProfile);
PhaseStatus fgSearchImprovedLayout();
template
@@ -6670,7 +6666,7 @@ class Compiler
GenTree* fgMorphCopyBlock(GenTree* tree);
private:
GenTree* fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optAssertionPropDone = nullptr);
- void fgTryReplaceStructLocalWithFields(GenTree** use);
+ bool fgTryReplaceStructLocalWithFields(GenTree** use);
GenTree* fgMorphFinalizeIndir(GenTreeIndir* indir);
GenTree* fgOptimizeCast(GenTreeCast* cast);
GenTree* fgOptimizeCastOnStore(GenTree* store);
@@ -7741,7 +7737,6 @@ class Compiler
O2K_INVALID,
O2K_LCLVAR_COPY,
O2K_CONST_INT,
- O2K_CONST_LONG,
O2K_CONST_DOUBLE,
O2K_ZEROOBJ,
O2K_SUBRANGE,
@@ -7780,17 +7775,13 @@ class Compiler
ValueNum vn;
struct IntVal
{
- ssize_t iconVal; // integer
-#if !defined(HOST_64BIT)
- unsigned padding; // unused; ensures iconFlags does not overlap lconVal
-#endif
+ ssize_t iconVal; // integer
FieldSeq* fieldSeq;
};
union
{
SsaVar lcl;
IntVal u1;
- int64_t lconVal;
double dconVal;
IntegralRange u2;
};
@@ -7932,9 +7923,6 @@ class Compiler
case O2K_CONST_INT:
return ((op2.u1.iconVal == that->op2.u1.iconVal) && (op2.GetIconFlag() == that->op2.GetIconFlag()));
- case O2K_CONST_LONG:
- return (op2.lconVal == that->op2.lconVal);
-
case O2K_CONST_DOUBLE:
// exact match because of positive and negative zero.
return (memcmp(&op2.dconVal, &that->op2.dconVal, sizeof(double)) == 0);
@@ -8182,6 +8170,7 @@ class Compiler
bool optIsStackLocalInvariant(FlowGraphNaturalLoop* loop, unsigned lclNum);
bool optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum, bool* topLevelIsFinal);
+ bool optExtractSpanIndex(GenTree* tree, SpanIndex* result);
bool optReconstructArrIndexHelp(GenTree* tree, ArrIndex* result, unsigned lhsNum, bool* topLevelIsFinal);
bool optReconstructArrIndex(GenTree* tree, ArrIndex* result);
bool optIdentifyLoopOptInfo(FlowGraphNaturalLoop* loop, LoopCloneContext* context);
@@ -10474,6 +10463,9 @@ class Compiler
// Collect 64 bit counts for PGO data.
bool compCollect64BitCounts;
+ // Allow inlining of methods with EH.
+ bool compInlineMethodsWithEH;
+
} opts;
static bool s_pAltJitExcludeAssembliesListInitialized;
@@ -10977,9 +10969,10 @@ class Compiler
size_t compInfoBlkSize;
BYTE* compInfoBlkAddr;
- EHblkDsc* compHndBBtab = nullptr; // array of EH data
- unsigned compHndBBtabCount = 0; // element count of used elements in EH data array
- unsigned compHndBBtabAllocCount = 0; // element count of allocated elements in EH data array
+ EHblkDsc* compHndBBtab = nullptr; // array of EH data
+ unsigned compHndBBtabCount = 0; // element count of used elements in EH data array
+ unsigned compHndBBtabAllocCount = 0; // element count of allocated elements in EH data array
+ unsigned short compEHID = 0; // unique ID for EH data array entries
#if defined(FEATURE_EH_WINDOWS_X86)
diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp
index a1e706fde6083e..ec44e86e45ef04 100644
--- a/src/coreclr/jit/compiler.hpp
+++ b/src/coreclr/jit/compiler.hpp
@@ -1110,6 +1110,31 @@ inline regNumber genFirstRegNumFromMaskAndToggle(SingleTypeRegSet& mask, var_typ
return regNum;
}
+//------------------------------------------------------------------------------
+// genFirstRegNumFromMaskAndToggle : Maps first bit set in the register mask to a
+// register number and also toggle the bit in the `mask`.
+// Arguments:
+// mask - the register mask
+// type - type of the register mask
+//
+// Return Value:
+// The number of the first register contained in the mask and updates the `mask` to toggle
+// the bit.
+//
+
+inline regNumber genFirstRegNumFromMaskAndToggle(SingleTypeRegSet& mask)
+{
+ assert(mask != RBM_NONE); // Must have one bit set, so can't have a mask of zero
+
+ /* Convert the mask to a register number */
+
+ regNumber regNum = (regNumber)BitOperations::BitScanForward(mask);
+
+ mask ^= genSingleTypeRegMask(regNum);
+
+ return regNum;
+}
+
/*****************************************************************************
*
* Return the size in bytes of the given type.
diff --git a/src/coreclr/jit/decomposelongs.cpp b/src/coreclr/jit/decomposelongs.cpp
index 844263d400cfdb..0461a12d181d3a 100644
--- a/src/coreclr/jit/decomposelongs.cpp
+++ b/src/coreclr/jit/decomposelongs.cpp
@@ -78,11 +78,11 @@ void DecomposeLongs::DecomposeBlock(BasicBlock* block)
// Return Value:
// None.
//
-void DecomposeLongs::DecomposeRange(Compiler* compiler, LIR::Range& range)
+void DecomposeLongs::DecomposeRange(Compiler* compiler, Lowering* lowering, LIR::Range& range)
{
assert(compiler != nullptr);
- DecomposeLongs decomposer(compiler);
+ DecomposeLongs decomposer(compiler, lowering);
decomposer.m_range = ⦥
decomposer.DecomposeRangeHelper();
@@ -90,7 +90,7 @@ void DecomposeLongs::DecomposeRange(Compiler* compiler, LIR::Range& range)
//------------------------------------------------------------------------
// DecomposeLongs::DecomposeRangeHelper:
-// Decompiose each node in the current range.
+// Decompose each node in the current range.
//
// Decomposition is done as an execution-order walk. Decomposition of
// a particular node can create new nodes that need to be further
@@ -122,44 +122,76 @@ void DecomposeLongs::DecomposeRangeHelper()
GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
{
// Handle the case where we are implicitly using the lower half of a long lclVar.
- if ((tree->TypeGet() == TYP_INT) && tree->OperIsLocal())
+ if (tree->TypeIs(TYP_INT) && tree->OperIsLocal())
{
LclVarDsc* varDsc = m_compiler->lvaGetDesc(tree->AsLclVarCommon());
if (varTypeIsLong(varDsc) && varDsc->lvPromoted)
{
-#ifdef DEBUG
- if (m_compiler->verbose)
- {
- printf("Changing implicit reference to lo half of long lclVar to an explicit reference of its promoted "
- "half:\n");
- m_compiler->gtDispTreeRange(Range(), tree);
- }
-#endif // DEBUG
+ JITDUMP("Changing implicit reference to lo half of long lclVar to an explicit reference of its promoted "
+ "half:\n");
+ DISPTREERANGE(Range(), tree);
+
unsigned loVarNum = varDsc->lvFieldLclStart;
tree->AsLclVarCommon()->SetLclNum(loVarNum);
return tree->gtNext;
}
}
- if (tree->TypeGet() != TYP_LONG)
+ if (!tree->TypeIs(TYP_LONG))
{
return tree->gtNext;
}
-#ifdef DEBUG
- if (m_compiler->verbose)
- {
- printf("Decomposing TYP_LONG tree. BEFORE:\n");
- m_compiler->gtDispTreeRange(Range(), tree);
- }
-#endif // DEBUG
-
LIR::Use use;
if (!Range().TryGetUse(tree, &use))
{
LIR::Use::MakeDummyUse(Range(), tree, &use);
}
+#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_X86)
+ if (!use.IsDummyUse())
+ {
+ // HWIntrinsics can consume/produce a long directly, provided its source/target is memory.
+ // Here we do a conservative check for specific cases where it is certain the load/store
+ // can be contained. In those cases, we can skip decomposition.
+
+ GenTree* user = use.User();
+
+ if (user->OperIsHWIntrinsic())
+ {
+ if (tree->OperIs(GT_CNS_LNG) ||
+ (tree->OperIs(GT_IND, GT_LCL_FLD) && m_lowering->IsSafeToContainMem(user, tree)))
+ {
+ NamedIntrinsic intrinsicId = user->AsHWIntrinsic()->GetHWIntrinsicId();
+ assert(HWIntrinsicInfo::IsVectorCreate(intrinsicId) ||
+ HWIntrinsicInfo::IsVectorCreateScalar(intrinsicId) ||
+ HWIntrinsicInfo::IsVectorCreateScalarUnsafe(intrinsicId));
+
+ return tree->gtNext;
+ }
+ }
+ else if (user->OperIs(GT_STOREIND) && tree->OperIsHWIntrinsic() && m_compiler->opts.OptimizationEnabled())
+ {
+ NamedIntrinsic intrinsicId = tree->AsHWIntrinsic()->GetHWIntrinsicId();
+ if (HWIntrinsicInfo::IsVectorToScalar(intrinsicId) && m_lowering->IsSafeToContainMem(user, tree))
+ {
+ return tree->gtNext;
+ }
+ }
+ }
+
+ if (tree->OperIs(GT_STOREIND) && tree->AsStoreInd()->Data()->OperIsHWIntrinsic())
+ {
+ // We should only get here if we matched the second pattern above.
+ assert(HWIntrinsicInfo::IsVectorToScalar(tree->AsStoreInd()->Data()->AsHWIntrinsic()->GetHWIntrinsicId()));
+
+ return tree->gtNext;
+ }
+#endif // FEATURE_HW_INTRINSICS && TARGET_X86
+
+ JITDUMP("Decomposing TYP_LONG tree. BEFORE:\n");
+ DISPTREERANGE(Range(), tree);
+
GenTree* nextNode = nullptr;
switch (tree->OperGet())
{
@@ -270,19 +302,14 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
// If we replaced the argument to a GT_FIELD_LIST element with a GT_LONG node, split that field list
// element into two elements: one for each half of the GT_LONG.
- if ((use.Def()->OperGet() == GT_LONG) && !use.IsDummyUse() && (use.User()->OperGet() == GT_FIELD_LIST))
+ if (use.Def()->OperIs(GT_LONG) && !use.IsDummyUse() && use.User()->OperIs(GT_FIELD_LIST))
{
DecomposeFieldList(use.User()->AsFieldList(), use.Def()->AsOp());
}
-#ifdef DEBUG
- if (m_compiler->verbose)
- {
- // NOTE: st_lcl_var doesn't dump properly afterwards.
- printf("Decomposing TYP_LONG tree. AFTER:\n");
- m_compiler->gtDispTreeRange(Range(), use.Def());
- }
-#endif
+ // NOTE: st_lcl_var doesn't dump properly afterwards.
+ JITDUMP("Decomposing TYP_LONG tree. AFTER:\n");
+ DISPTREERANGE(Range(), use.Def());
// When casting from a decomposed long to a smaller integer we can discard the high part.
if (m_compiler->opts.OptimizationEnabled() && !use.IsDummyUse() && use.User()->OperIs(GT_CAST) &&
@@ -1707,6 +1734,13 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsic(LIR::Use& use)
return DecomposeHWIntrinsicGetElement(use, hwintrinsicTree);
}
+ case NI_Vector128_ToScalar:
+ case NI_Vector256_ToScalar:
+ case NI_Vector512_ToScalar:
+ {
+ return DecomposeHWIntrinsicToScalar(use, hwintrinsicTree);
+ }
+
case NI_EVEX_MoveMask:
{
return DecomposeHWIntrinsicMoveMask(use, hwintrinsicTree);
@@ -1751,9 +1785,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicGetElement(LIR::Use& use, GenTreeHW
{
assert(node == use.Def());
assert(varTypeIsLong(node));
- assert((node->GetHWIntrinsicId() == NI_Vector128_GetElement) ||
- (node->GetHWIntrinsicId() == NI_Vector256_GetElement) ||
- (node->GetHWIntrinsicId() == NI_Vector512_GetElement));
+ assert(HWIntrinsicInfo::IsVectorGetElement(node->GetHWIntrinsicId()));
GenTree* op1 = node->Op(1);
GenTree* op2 = node->Op(2);
@@ -1835,6 +1867,75 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicGetElement(LIR::Use& use, GenTreeHW
return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
+//------------------------------------------------------------------------
+// DecomposeHWIntrinsicToScalar: Decompose GT_HWINTRINSIC -- NI_Vector*_ToScalar.
+//
+// create:
+//
+// tmp_simd_var = simd_var
+// lo_result = GT_HWINTRINSIC{ToScalar}[int](tmp_simd_var)
+// hi_result = GT_HWINTRINSIC{GetElement}[int](tmp_simd_var, 1)
+// - or -
+// GT_HWINTRINSIC{ToScalar}[int](GT_RSZ(tmp_simd_var, 32))
+// return: GT_LONG(lo_result, hi_result)
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+// node - the hwintrinsic node to decompose
+//
+// Return Value:
+// The GT_LONG node wrapping the upper and lower halves.
+//
+GenTree* DecomposeLongs::DecomposeHWIntrinsicToScalar(LIR::Use& use, GenTreeHWIntrinsic* node)
+{
+ assert(node == use.Def());
+ assert(varTypeIsLong(node));
+ assert(HWIntrinsicInfo::IsVectorToScalar(node->GetHWIntrinsicId()));
+
+ GenTree* op1 = node->Op(1);
+ NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
+ var_types simdBaseType = node->GetSimdBaseType();
+ unsigned simdSize = node->GetSimdSize();
+
+ assert(varTypeIsLong(simdBaseType));
+ assert(varTypeIsSIMD(op1));
+
+ GenTree* simdTmpVar = RepresentOpAsLocalVar(op1, node, &node->Op(1));
+ unsigned simdTmpVarNum = simdTmpVar->AsLclVarCommon()->GetLclNum();
+ JITDUMP("[DecomposeHWIntrinsicToScalar]: Saving op1 tree to a temp var:\n");
+ DISPTREERANGE(Range(), simdTmpVar);
+
+ GenTree* loResult = m_compiler->gtNewSimdToScalarNode(TYP_INT, simdTmpVar, CORINFO_TYPE_INT, simdSize);
+ Range().InsertAfter(simdTmpVar, loResult);
+
+ simdTmpVar = m_compiler->gtNewLclLNode(simdTmpVarNum, simdTmpVar->TypeGet());
+ Range().InsertAfter(loResult, simdTmpVar);
+
+ GenTree* hiResult;
+ if (m_compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41))
+ {
+ GenTree* one = m_compiler->gtNewIconNode(1);
+ hiResult = m_compiler->gtNewSimdGetElementNode(TYP_INT, simdTmpVar, one, CORINFO_TYPE_INT, simdSize);
+
+ Range().InsertAfter(simdTmpVar, one, hiResult);
+ }
+ else
+ {
+ assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_SSE2));
+
+ GenTree* thirtyTwo = m_compiler->gtNewIconNode(32);
+ GenTree* shift = m_compiler->gtNewSimdBinOpNode(GT_RSZ, op1->TypeGet(), simdTmpVar, thirtyTwo,
+ node->GetSimdBaseJitType(), simdSize);
+ hiResult = m_compiler->gtNewSimdToScalarNode(TYP_INT, shift, CORINFO_TYPE_INT, simdSize);
+
+ Range().InsertAfter(simdTmpVar, thirtyTwo, shift, hiResult);
+ }
+
+ Range().Remove(node);
+
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
+}
+
//------------------------------------------------------------------------
// DecomposeHWIntrinsicMoveMask: Decompose GT_HWINTRINSIC -- NI_EVEX_MoveMask
//
@@ -2262,6 +2363,13 @@ void DecomposeLongs::TryPromoteLongVar(unsigned lclNum)
{
return;
}
+#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_X86)
+ if (varDsc->lvIsParam)
+ {
+ // Promotion blocks combined read optimizations for SIMD loads of long params
+ return;
+ }
+#endif // FEATURE_HW_INTRINSICS && TARGET_X86
varDsc->lvFieldCnt = 2;
varDsc->lvFieldLclStart = m_compiler->lvaCount;
diff --git a/src/coreclr/jit/decomposelongs.h b/src/coreclr/jit/decomposelongs.h
index 02681322a552e9..e879292abf4996 100644
--- a/src/coreclr/jit/decomposelongs.h
+++ b/src/coreclr/jit/decomposelongs.h
@@ -14,19 +14,21 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#define _DECOMPOSELONGS_H_
#include "compiler.h"
+#include "lower.h"
class DecomposeLongs
{
public:
- DecomposeLongs(Compiler* compiler)
+ DecomposeLongs(Compiler* compiler, Lowering* lowering)
: m_compiler(compiler)
+ , m_lowering(lowering)
{
}
void PrepareForDecomposition();
void DecomposeBlock(BasicBlock* block);
- static void DecomposeRange(Compiler* compiler, LIR::Range& range);
+ static void DecomposeRange(Compiler* compiler, Lowering* lowering, LIR::Range& range);
private:
inline LIR::Range& Range() const
@@ -64,6 +66,7 @@ class DecomposeLongs
#ifdef FEATURE_HW_INTRINSICS
GenTree* DecomposeHWIntrinsic(LIR::Use& use);
GenTree* DecomposeHWIntrinsicGetElement(LIR::Use& use, GenTreeHWIntrinsic* node);
+ GenTree* DecomposeHWIntrinsicToScalar(LIR::Use& use, GenTreeHWIntrinsic* node);
GenTree* DecomposeHWIntrinsicMoveMask(LIR::Use& use, GenTreeHWIntrinsic* node);
#endif // FEATURE_HW_INTRINSICS
@@ -80,6 +83,7 @@ class DecomposeLongs
// Data
Compiler* m_compiler;
+ Lowering* m_lowering;
LIR::Range* m_range;
};
diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp
index 3d251f4e7bd192..9df0a0c29e56be 100644
--- a/src/coreclr/jit/ee_il_dll.cpp
+++ b/src/coreclr/jit/ee_il_dll.cpp
@@ -29,9 +29,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#define DLLEXPORT
#endif // !DLLEXPORT
-#if defined(HOST_ANDROID)
-#include
-#endif
+#include "minipal/log.h"
/*****************************************************************************/
@@ -150,16 +148,19 @@ FILE* jitstdout()
// Like printf/logf, but only outputs to jitstdout -- skips call back into EE.
int jitprintf(const char* fmt, ...)
{
+ int status;
va_list vl;
va_start(vl, fmt);
-#if defined(HOST_ANDROID)
- int status = jitstdout() == procstdout()
- ? __android_log_vprint(ANDROID_LOG_VERBOSE, MAIN_CLR_MODULE_NAME_A, fmt, vl)
- : vfprintf(jitstdout(), fmt, vl);
-#else
- int status = vfprintf(jitstdout(), fmt, vl);
-#endif
+ if (jitstdout() == procstdout())
+ {
+ status = minipal_log_vprint_verbose(fmt, vl);
+ }
+ else
+ {
+ status = vfprintf(jitstdout(), fmt, vl);
+ }
va_end(vl);
+
return status;
}
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index 5574fc0b439194..ef1fd2f701fc15 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -1403,6 +1403,13 @@ class emitter
{
return idHasMemGenWrite() || idHasMemStkWrite() || idHasMemAdrWrite();
}
+
+ bool idHasMemAndCns() const
+ {
+ assert((unsigned)idInsFmt() < emitFmtCount);
+ ID_OPS idOp = (ID_OPS)emitFmtToOps[idInsFmt()];
+ return ((idOp == ID_OP_CNS) || (idOp == ID_OP_DSP_CNS) || (idOp == ID_OP_AMD_CNS));
+ }
#endif // defined(TARGET_XARCH)
#ifdef TARGET_ARMARCH
insOpts idInsOpt() const
@@ -4060,10 +4067,7 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id, bool ignoreEmbeddedBroadcast)
// which case we load either a scalar or full vector; otherwise,
// we load a 128-bit vector
- assert((unsigned)id->idInsFmt() < emitFmtCount);
- ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
-
- if ((idOp != ID_OP_CNS) && (idOp != ID_OP_SCNS) && (idOp != ID_OP_DSP_CNS) && (idOp != ID_OP_AMD_CNS))
+ if (!id->idHasMemAndCns())
{
memSize = 16;
}
@@ -4098,10 +4102,7 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id, bool ignoreEmbeddedBroadcast)
// Embedded broadcast is never supported so if we have a cns operand
// we load a full vector; otherwise, we load a 128-bit vector
- assert((unsigned)id->idInsFmt() < emitFmtCount);
- ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
-
- if ((idOp != ID_OP_CNS) && (idOp != ID_OP_SCNS) && (idOp != ID_OP_DSP_CNS) && (idOp != ID_OP_AMD_CNS))
+ if (!id->idHasMemAndCns())
{
memSize = 16;
}
diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp
index 2b008b2f669e6f..0c5e32c9dbda59 100644
--- a/src/coreclr/jit/emitriscv64.cpp
+++ b/src/coreclr/jit/emitriscv64.cpp
@@ -92,7 +92,6 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id) const
return sizeof(instrDesc);
}
- case INS_OPTS_I:
case INS_OPTS_RC:
case INS_OPTS_RL:
case INS_OPTS_RELOC:
@@ -2978,56 +2977,6 @@ BYTE* emitter::emitOutputInstr_OptsReloc(BYTE* dst, const instrDesc* id, instruc
return dst;
}
-BYTE* emitter::emitOutputInstr_OptsI(BYTE* dst, const instrDesc* id)
-{
- ssize_t immediate = reinterpret_cast(id->idAddr()->iiaAddr);
- const regNumber reg1 = id->idReg1();
-
- switch (id->idCodeSize())
- {
- case 8:
- return emitOutputInstr_OptsI8(dst, id, immediate, reg1);
- case 32:
- return emitOutputInstr_OptsI32(dst, immediate, reg1);
- default:
- break;
- }
- unreached();
- return nullptr;
-}
-
-BYTE* emitter::emitOutputInstr_OptsI8(BYTE* dst, const instrDesc* id, ssize_t immediate, regNumber reg1)
-{
- if (id->idReg2())
- {
- // special for INT64_MAX or UINT32_MAX
- dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, REG_R0, WordMask(12));
- const unsigned shiftValue = (immediate == INT64_MAX) ? 1 : 32;
- dst += emitOutput_ITypeInstr(dst, INS_srli, reg1, reg1, shiftValue);
- }
- else
- {
- dst += emitOutput_UTypeInstr(dst, INS_lui, reg1, UpperNBitsOfWordSignExtend<20>(immediate));
- dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<12>(immediate));
- }
- return dst;
-}
-
-BYTE* emitter::emitOutputInstr_OptsI32(BYTE* dst, ssize_t immediate, regNumber reg1)
-{
- const unsigned upperWord = UpperWordOfDoubleWord(immediate);
- dst += emitOutput_UTypeInstr(dst, INS_lui, reg1, UpperNBitsOfWordSignExtend<20>(upperWord));
- dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<12>(upperWord));
- const unsigned lowerWord = LowerWordOfDoubleWord(immediate);
- dst += emitOutput_ITypeInstr(dst, INS_slli, reg1, reg1, 11);
- dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<11>(lowerWord >> 21));
- dst += emitOutput_ITypeInstr(dst, INS_slli, reg1, reg1, 11);
- dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<11>(lowerWord >> 10));
- dst += emitOutput_ITypeInstr(dst, INS_slli, reg1, reg1, 10);
- dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<10>(lowerWord));
- return dst;
-}
-
BYTE* emitter::emitOutputInstr_OptsRc(BYTE* dst, const instrDesc* id, instruction* ins)
{
assert(id->idAddr()->iiaIsJitDataOffset());
@@ -3282,11 +3231,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
dst = emitOutputInstr_OptsReloc(dst, id, &ins);
sz = sizeof(instrDesc);
break;
- case INS_OPTS_I:
- dst = emitOutputInstr_OptsI(dst, id);
- ins = INS_addi;
- sz = sizeof(instrDesc);
- break;
case INS_OPTS_RC:
dst = emitOutputInstr_OptsRc(dst, id, &ins);
sz = sizeof(instrDesc);
@@ -3366,31 +3310,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
if (vt == TYP_REF || vt == TYP_BYREF)
emitGCvarDeadUpd(adr + ofs, dst2 DEBUG_ARG(varNum));
}
- // if (emitInsWritesToLclVarStackLocPair(id))
- //{
- // unsigned ofs2 = ofs + TARGET_POINTER_SIZE;
- // if (id->idGCrefReg2() != GCT_NONE)
- // {
- // emitGCvarLiveUpd(adr + ofs2, varNum, id->idGCrefReg2(), *dp);
- // }
- // else
- // {
- // // If the type of the local is a gc ref type, update the liveness.
- // var_types vt;
- // if (varNum >= 0)
- // {
- // // "Regular" (non-spill-temp) local.
- // vt = var_types(emitComp->lvaTable[varNum].lvType);
- // }
- // else
- // {
- // TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum);
- // vt = tmpDsc->tdTempType();
- // }
- // if (vt == TYP_REF || vt == TYP_BYREF)
- // emitGCvarDeadUpd(adr + ofs2, *dp);
- // }
- //}
}
#ifdef DEBUG
@@ -3543,6 +3462,22 @@ bool emitter::emitDispBranch(
void emitter::emitDispIllegalInstruction(code_t instructionCode)
{
printf("RISCV64 illegal instruction: 0x%08X\n", instructionCode);
+ assert(!"RISCV64 illegal instruction");
+}
+
+void emitter::emitDispImmediate(ssize_t imm, bool newLine /*= true*/, unsigned regBase /*= REG_ZERO*/)
+{
+ if (emitComp->opts.disDiffable && (regBase != REG_FP) && (regBase != REG_SP))
+ {
+ printf("0xD1FFAB1E");
+ }
+ else
+ {
+ printf("%li", imm);
+ }
+
+ if (newLine)
+ printf("\n");
}
//----------------------------------------------------------------------------------------
@@ -3587,7 +3522,8 @@ void emitter::emitDispInsName(
{
imm20 |= 0xfff00000;
}
- printf("lui %s, %d\n", rd, imm20);
+ printf("lui %s, ", rd);
+ emitDispImmediate(imm20);
return;
}
case MajorOpcode::Auipc:
@@ -3598,7 +3534,8 @@ void emitter::emitDispInsName(
{
imm20 |= 0xfff00000;
}
- printf("auipc %s, %d\n", rd, imm20);
+ printf("auipc %s, ", rd);
+ emitDispImmediate(imm20);
return;
}
case MajorOpcode::OpImm:
@@ -3684,10 +3621,18 @@ void emitter::emitDispInsName(
assert(printLength > 0);
int paddingLength = kMaxInstructionLength - printLength;
- printf("%*s %s, %s", paddingLength, "", RegNames[rd], RegNames[rs1]);
+ printf("%*s %s, %s, ", paddingLength, "", RegNames[rd], RegNames[rs1]);
if (hasImmediate)
{
- printf(isHex ? ", 0x%x" : ", %d", imm12);
+ if (opcode2 == 0x0) // ADDI
+ {
+ assert(!isHex);
+ emitDispImmediate(imm12, false, rs1);
+ }
+ else
+ {
+ printf(isHex ? "0x%x" : "%d", imm12);
+ }
}
printf("\n");
@@ -3698,11 +3643,7 @@ void emitter::emitDispInsName(
unsigned int opcode2 = (code >> 12) & 0x7;
const char* rd = RegNames[(code >> 7) & 0x1f];
const char* rs1 = RegNames[(code >> 15) & 0x1f];
- int imm12 = (((int)code) >> 20); // & 0xfff;
- // if (imm12 & 0x800)
- //{
- // imm12 |= 0xfffff000;
- //}
+ int imm12 = (((int)code) >> 20);
switch (opcode2)
{
case 0x0: // ADDIW & SEXT.W
@@ -3712,7 +3653,8 @@ void emitter::emitDispInsName(
}
else
{
- printf("addiw %s, %s, %d\n", rd, rs1, imm12);
+ printf("addiw %s, %s, ", rd, rs1);
+ emitDispImmediate(imm12);
}
return;
case 0x1: // SLLIW
@@ -3912,44 +3854,29 @@ void emitter::emitDispInsName(
case MajorOpcode::Store:
{
unsigned int opcode2 = (code >> 12) & 0x7;
- const char* rs1 = RegNames[(code >> 15) & 0x1f];
- const char* rs2 = RegNames[(code >> 20) & 0x1f];
- int offset = (((code >> 25) & 0x7f) << 5) | ((code >> 7) & 0x1f);
+ if (opcode2 >= 4)
+ return emitDispIllegalInstruction(code);
+
+ unsigned rs1Num = (code >> 15) & 0x1f;
+ const char* rs1 = RegNames[rs1Num];
+ const char* rs2 = RegNames[(code >> 20) & 0x1f];
+ int offset = (((code >> 25) & 0x7f) << 5) | ((code >> 7) & 0x1f);
if (offset & 0x800)
{
offset |= 0xfffff000;
}
- switch (opcode2)
- {
- case 0: // SB
- printf("sb %s, %d(%s)\n", rs2, offset, rs1);
- return;
- case 1: // SH
- printf("sh %s, %d(%s)\n", rs2, offset, rs1);
- return;
- case 2: // SW
- printf("sw %s, %d(%s)\n", rs2, offset, rs1);
- return;
- case 3: // SD
- printf("sd %s, %d(%s)\n", rs2, offset, rs1);
- return;
- default:
- printf("RISCV64 illegal instruction: 0x%08X\n", code);
- return;
- }
+ char width = "bhwd"[opcode2];
+ printf("s%c %s, ", width, rs2);
+ emitDispImmediate(offset, false, rs1Num);
+ printf("(%s)\n", rs1);
+ return;
}
case MajorOpcode::Branch:
{
unsigned opcode2 = (code >> 12) & 0x7;
unsigned rs1 = (code >> 15) & 0x1f;
unsigned rs2 = (code >> 20) & 0x1f;
- // int offset = (((code >> 31) & 0x1) << 12) | (((code >> 7) & 0x1) << 11) | (((code >> 25) & 0x3f) << 5) |
- // (((code >> 8) & 0xf) << 1);
- // if (offset & 0x800)
- // {
- // offset |= 0xfffff000;
- // }
if (!emitDispBranch(opcode2, rs1, rs2, id, ig))
{
emitDispIllegalInstruction(code);
@@ -3959,7 +3886,8 @@ void emitter::emitDispInsName(
case MajorOpcode::Load:
{
unsigned int opcode2 = (code >> 12) & 0x7;
- const char* rs1 = RegNames[(code >> 15) & 0x1f];
+ unsigned rs1Num = (code >> 15) & 0x1f;
+ const char* rs1 = RegNames[rs1Num];
const char* rd = RegNames[(code >> 7) & 0x1f];
int offset = ((code >> 20) & 0xfff);
if (offset & 0x800)
@@ -3967,33 +3895,15 @@ void emitter::emitDispInsName(
offset |= 0xfffff000;
}
- switch (opcode2)
- {
- case 0: // LB
- printf("lb %s, %d(%s)\n", rd, offset, rs1);
- return;
- case 1: // LH
- printf("lh %s, %d(%s)\n", rd, offset, rs1);
- return;
- case 2: // LW
- printf("lw %s, %d(%s)\n", rd, offset, rs1);
- return;
- case 3: // LD
- printf("ld %s, %d(%s)\n", rd, offset, rs1);
- return;
- case 4: // LBU
- printf("lbu %s, %d(%s)\n", rd, offset, rs1);
- return;
- case 5: // LHU
- printf("lhu %s, %d(%s)\n", rd, offset, rs1);
- return;
- case 6: // LWU
- printf("lwu %s, %d(%s)\n", rd, offset, rs1);
- return;
- default:
- printf("RISCV64 illegal instruction: 0x%08X\n", code);
- return;
- }
+ char width = "bhwd"[opcode2 & 0b011];
+ char unsign = ((opcode2 & 0b100) != 0) ? 'u' : ' ';
+ if (width == 'd' && unsign == 'u')
+ return emitDispIllegalInstruction(code);
+
+ printf("l%c%c %s, ", width, unsign, rd);
+ emitDispImmediate(offset, false, rs1Num);
+ printf("(%s)\n", rs1);
+ return;
}
case MajorOpcode::Jalr:
{
@@ -4005,13 +3915,23 @@ void emitter::emitDispInsName(
offset |= 0xfffff000;
}
- if ((rs1 == REG_RA) && (rd == REG_ZERO))
+ if ((offset == 0) && (rs1 == REG_RA) && (rd == REG_ZERO))
{
printf("ret");
return;
}
- printf("jalr %s, %d(%s)", RegNames[rd], offset, RegNames[rs1]);
+ if ((offset == 0) && ((rd == REG_RA) || (rd == REG_ZERO)))
+ {
+ const char* name = (rd == REG_RA) ? "jalr" : "jr ";
+ printf("%s %s", name, RegNames[rs1]);
+ }
+ else
+ {
+ printf("jalr %s, ", RegNames[rd]);
+ emitDispImmediate(offset, false);
+ printf("(%s)", RegNames[rs1]);
+ }
CORINFO_METHOD_HANDLE handle = (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie;
// Target for ret call is unclear, e.g.:
// jalr zero, 0(ra)
@@ -4034,9 +3954,10 @@ void emitter::emitDispInsName(
{
offset |= 0xfff00000;
}
- if (rd == REG_ZERO)
+ if ((rd == REG_ZERO) || (rd == REG_RA))
{
- printf("j ");
+ const char* name = (rd == REG_RA) ? "jal" : "j ";
+ printf("%s ", name);
if (id->idIsBound())
{
@@ -4044,12 +3965,15 @@ void emitter::emitDispInsName(
}
else
{
- printf("pc%+d instructions", offset >> 2);
+ printf("pc%+");
+ emitDispImmediate(offset / sizeof(code_t));
+ printf(" instructions");
}
}
else
{
- printf("jal %s, %d", RegNames[rd], offset);
+ printf("jal %s, ", RegNames[rd]);
+ emitDispImmediate(offset, false);
}
CORINFO_METHOD_HANDLE handle = (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie;
if (handle != 0)
@@ -4430,50 +4354,43 @@ void emitter::emitDispInsName(
case MajorOpcode::StoreFp:
{
unsigned int opcode2 = (code >> 12) & 0x7;
+ if ((opcode2 != 2) && (opcode2 != 3))
+ return emitDispIllegalInstruction(code);
- const char* rs1 = RegNames[(code >> 15) & 0x1f];
+ unsigned rs1Num = (code >> 15) & 0x1f;
+ const char* rs1 = RegNames[rs1Num];
const char* rs2 = RegNames[((code >> 20) & 0x1f) | 0x20];
int offset = (((code >> 25) & 0x7f) << 5) | ((code >> 7) & 0x1f);
if (offset & 0x800)
{
offset |= 0xfffff000;
}
- if (opcode2 == 2) // FSW
- {
- printf("fsw %s, %d(%s)\n", rs2, offset, rs1);
- }
- else if (opcode2 == 3) // FSD
- {
- printf("fsd %s, %d(%s)\n", rs2, offset, rs1);
- }
- else
- {
- NYI_RISCV64("illegal ins within emitDisInsName!");
- }
+
+ char width = "bhwd"[opcode2];
+ printf("fs%c %s, ", width, rs2);
+ emitDispImmediate(offset, false, rs1Num);
+ printf("(%s)\n", rs1);
return;
}
case MajorOpcode::LoadFp:
{
unsigned int opcode2 = (code >> 12) & 0x7;
- const char* rs1 = RegNames[(code >> 15) & 0x1f];
- const char* rd = RegNames[((code >> 7) & 0x1f) | 0x20];
- int offset = ((code >> 20) & 0xfff);
+ if ((opcode2 != 2) && (opcode2 != 3))
+ return emitDispIllegalInstruction(code);
+
+ unsigned rs1Num = (code >> 15) & 0x1f;
+ const char* rs1 = RegNames[rs1Num];
+ const char* rd = RegNames[((code >> 7) & 0x1f) | 0x20];
+ int offset = ((code >> 20) & 0xfff);
if (offset & 0x800)
{
offset |= 0xfffff000;
}
- if (opcode2 == 2) // FLW
- {
- printf("flw %s, %d(%s)\n", rd, offset, rs1);
- }
- else if (opcode2 == 3) // FLD
- {
- printf("fld %s, %d(%s)\n", rd, offset, rs1);
- }
- else
- {
- NYI_RISCV64("illegal ins within emitDisInsName!");
- }
+
+ char width = "bhwd"[opcode2];
+ printf("fl%c %s, ", width, rd);
+ emitDispImmediate(offset, false, rs1Num);
+ printf("(%s)\n", rs1);
return;
}
case MajorOpcode::Amo:
diff --git a/src/coreclr/jit/emitriscv64.h b/src/coreclr/jit/emitriscv64.h
index 5006645c74a0bb..bcc87538f3b18d 100644
--- a/src/coreclr/jit/emitriscv64.h
+++ b/src/coreclr/jit/emitriscv64.h
@@ -68,6 +68,7 @@ void emitDispBranchOffset(const instrDesc* id, const insGroup* ig) const;
void emitDispBranchLabel(const instrDesc* id) const;
bool emitDispBranchInstrType(unsigned opcode2, bool is_zero_reg, bool& print_second_reg) const;
void emitDispIllegalInstruction(code_t instructionCode);
+void emitDispImmediate(ssize_t imm, bool newLine = true, unsigned regBase = REG_ZERO);
emitter::code_t emitInsCode(instruction ins /*, insFormat fmt*/) const;
@@ -119,9 +120,6 @@ unsigned emitOutput_BTypeInstr_InvertComparation(
unsigned emitOutput_JTypeInstr(BYTE* dst, instruction ins, regNumber rd, unsigned imm21) const;
BYTE* emitOutputInstr_OptsReloc(BYTE* dst, const instrDesc* id, instruction* ins);
-BYTE* emitOutputInstr_OptsI(BYTE* dst, const instrDesc* id);
-BYTE* emitOutputInstr_OptsI8(BYTE* dst, const instrDesc* id, ssize_t immediate, regNumber reg1);
-BYTE* emitOutputInstr_OptsI32(BYTE* dst, ssize_t immediate, regNumber reg1);
BYTE* emitOutputInstr_OptsRc(BYTE* dst, const instrDesc* id, instruction* ins);
BYTE* emitOutputInstr_OptsRcReloc(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1);
BYTE* emitOutputInstr_OptsRcNoReloc(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1);
diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 7682b98a3a68ab..178d1b0232b20e 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -359,12 +359,13 @@ bool emitter::IsApxNFEncodableInstruction(instruction ins) const
//
bool emitter::IsApxExtendedEvexInstruction(instruction ins) const
{
+#ifdef TARGET_AMD64
if (!UsePromotedEVEXEncoding())
{
return false;
}
- if (HasApxNdd(ins) || HasApxNf(ins))
+ if (HasApxNdd(ins) || HasApxNf(ins) || (ins == INS_crc32_apx))
{
return true;
}
@@ -375,6 +376,9 @@ bool emitter::IsApxExtendedEvexInstruction(instruction ins) const
}
return false;
+#else // !TARGET_AMD64
+ return false;
+#endif
}
//------------------------------------------------------------------------
@@ -1711,6 +1715,14 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const
if (HasHighSIMDReg(id) || (id->idOpSize() == EA_64BYTE) || HasMaskReg(id))
{
// Requires the EVEX encoding due to used registers
+ // A special case here is KMOV, the original KMOV introduced in Avx512 can only be encoded in VEX, APX promoted
+ // them to EVEX, so only return true when APX is available.
+ if ((ins == INS_kmovb_msk) || (ins == INS_kmovw_msk) || (ins == INS_kmovd_msk) || (ins == INS_kmovq_msk) ||
+ (ins == INS_kmovb_gpr) || (ins == INS_kmovw_gpr) || (ins == INS_kmovd_gpr) || (ins == INS_kmovq_gpr))
+ {
+ // Use EVEX only when needed.
+ return HasExtendedGPReg(id);
+ }
return true;
}
@@ -1720,6 +1732,14 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const
return true;
}
+ if (HasExtendedGPReg(id))
+ {
+ // TODO-XArch-apx:
+ // revisit this part: this may have some conflicts with REX2 prefix, we may prefer REX2 if only EGPR is
+ // involved.
+ return true;
+ }
+
if (id->idIsEvexNfContextSet() && IsBMIInstruction(ins))
{
// Only a few BMI instructions shall be promoted to APX-EVEX due to NF feature.
@@ -1759,6 +1779,15 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const
return id->idHasMem();
}
+ if ((insTupleTypeInfo(ins) & INS_TT_MEM128) != 0)
+ {
+ assert((ins == INS_pslld) || (ins == INS_psllq) || (ins == INS_psllw) || (ins == INS_psrad) ||
+ (ins == INS_psraw) || (ins == INS_psrld) || (ins == INS_psrlq) || (ins == INS_psrlw));
+
+ // Memory operand with immediate can only be encoded using EVEX
+ return id->idHasMemAndCns();
+ }
+
return false;
}
@@ -1773,6 +1802,7 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const
//
bool emitter::TakesRex2Prefix(const instrDesc* id) const
{
+#ifdef TARGET_AMD64
// Return true iff the instruction supports REX2 encoding, and it requires to access EGPRs.
// TODO-xarch-apx:
@@ -1803,6 +1833,9 @@ bool emitter::TakesRex2Prefix(const instrDesc* id) const
#endif // DEBUG
return false;
+#else // !TARGET_AMD64
+ return false;
+#endif
}
//------------------------------------------------------------------------
@@ -1816,9 +1849,7 @@ bool emitter::TakesRex2Prefix(const instrDesc* id) const
//
bool emitter::TakesApxExtendedEvexPrefix(const instrDesc* id) const
{
- // TODO-XArch-APX:
- // Isolating legacy-promoted-EVEX case out from VEX/EVEX-promoted-EVEX,
- // as the latter ones are relatively simple, providing EGPRs functionality,
+#ifdef TARGET_AMD64
instruction ins = id->idIns();
if (!IsApxExtendedEvexInstruction(ins))
{
@@ -1846,6 +1877,11 @@ bool emitter::TakesApxExtendedEvexPrefix(const instrDesc* id) const
return true;
}
+ if (ins == INS_crc32_apx)
+ {
+ return true;
+ }
+
#if defined(DEBUG)
if (emitComp->DoJitStressPromotedEvexEncoding())
{
@@ -1858,6 +1894,9 @@ bool emitter::TakesApxExtendedEvexPrefix(const instrDesc* id) const
}
return false;
+#else // !TARGET_AMD64
+ return false;
+#endif
}
// Intel AVX-512 encoding is defined in "Intel 64 and ia-32 architectures software developer's manual volume 2", Section
@@ -2229,11 +2268,7 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
switch (ins)
{
case INS_cvtss2si:
- case INS_cvttss2si32:
- case INS_cvttss2si64:
case INS_cvtsd2si:
- case INS_cvttsd2si32:
- case INS_cvttsd2si64:
case INS_movd:
case INS_movnti:
case INS_andn:
@@ -2246,11 +2281,9 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
case INS_pdep:
case INS_pext:
case INS_rorx:
-#if defined(TARGET_AMD64)
case INS_sarx:
case INS_shlx:
case INS_shrx:
-#endif // TARGET_AMD64
case INS_vcvtsd2usi:
case INS_vcvtss2usi:
{
@@ -2387,7 +2420,12 @@ bool emitter::HasMaskReg(const instrDesc* id) const
}
#if defined(DEBUG)
- assert(!isMaskReg(id->idReg2()));
+ // After APX, KMOV instructions can be encoded in EVEX.
+ if (isMaskReg(id->idReg2()))
+ {
+ assert(IsKInstruction(id->idIns()));
+ return UsePromotedEVEXEncoding();
+ }
if (!id->idIsSmallDsc())
{
@@ -2733,7 +2771,7 @@ emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code)
emitter::code_t emitter::AddEvexVPrimePrefix(code_t code)
{
#if defined(TARGET_AMD64)
- assert(UseEvexEncoding() && hasEvexPrefix(code));
+ assert((UseEvexEncoding() || UsePromotedEVEXEncoding()) && hasEvexPrefix(code));
return emitter::code_t(code & 0xFFFFFFF7FFFFFFFFULL);
#else
unreached();
@@ -2753,7 +2791,7 @@ emitter::code_t emitter::AddEvexVPrimePrefix(code_t code)
emitter::code_t emitter::AddEvexRPrimePrefix(code_t code)
{
#if defined(TARGET_AMD64)
- assert(UseEvexEncoding() && hasEvexPrefix(code));
+ assert((UseEvexEncoding() || UsePromotedEVEXEncoding()) && hasEvexPrefix(code));
return emitter::code_t(code & 0xFFEFFFFFFFFFFFFFULL);
#else
unreached();
@@ -2822,13 +2860,38 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co
{
case 0x66:
{
- // None of the existing BMI instructions should be EVEX encoded.
- // After APX, BMI instructions can be EVEX encoded with NF feature.
+ // After APX, BMI instructions can be encoded in EVEX.
if (IsBMIInstruction(ins))
{
- // if BMI instructions reaches this part, then it should be APX-EVEX.
- // although the opcode of all the BMI instructions are defined with 0x66,
- // but it should not, skip this check.
+ switch (ins)
+ {
+ case INS_rorx:
+ case INS_pdep:
+ case INS_mulx:
+ case INS_shrx:
+ {
+ evexPrefix |= (0x03 << 8);
+ break;
+ }
+
+ case INS_pext:
+ case INS_sarx:
+ {
+ evexPrefix |= (0x02 << 8);
+ break;
+ }
+
+ case INS_shlx:
+ {
+ evexPrefix |= (0x01 << 8);
+ break;
+ }
+
+ default:
+ {
+ break;
+ }
+ }
break;
}
assert(!IsBMIInstruction(ins));
@@ -3012,32 +3075,25 @@ emitter::code_t emitter::emitExtractVexPrefix(instruction ins, code_t& code) con
case INS_rorx:
case INS_pdep:
case INS_mulx:
-// TODO: Unblock when enabled for x86
-#ifdef TARGET_AMD64
case INS_shrx:
-#endif
{
vexPrefix |= 0x03;
break;
}
case INS_pext:
-// TODO: Unblock when enabled for x86
-#ifdef TARGET_AMD64
case INS_sarx:
-#endif
{
vexPrefix |= 0x02;
break;
}
-// TODO: Unblock when enabled for x86
-#ifdef TARGET_AMD64
+
case INS_shlx:
{
vexPrefix |= 0x01;
break;
}
-#endif
+
default:
{
vexPrefix |= 0x00;
@@ -3755,11 +3811,9 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
case INS_pextrw:
case INS_pextrw_sse41:
case INS_rorx:
-#ifdef TARGET_AMD64
case INS_shlx:
case INS_sarx:
case INS_shrx:
-#endif
case INS_vcvtsd2usi:
case INS_vcvtss2usi:
case INS_vcvttsd2usi32:
@@ -4008,10 +4062,8 @@ unsigned emitter::emitGetVexPrefixSize(instrDesc* id) const
switch (ins)
{
case INS_crc32:
-#if defined(TARGET_AMD64)
case INS_sarx:
case INS_shrx:
-#endif // TARGET_AMD64
{
// When the prefix is 0x0F38 or 0x0F3A, we must use the 3-byte encoding
// These are special cases where the pp-bit is 0xF2 or 0xF3 and not 0x66
@@ -4231,8 +4283,19 @@ inline unsigned emitter::insEncodeReg012(const instrDesc* id, regNumber reg, emi
if (IsExtendedGPReg(reg))
{
// Seperate the encoding for REX2.B3/B4, REX2.B3 will be handled in `AddRexBPrefix`.
- assert(TakesRex2Prefix(id));
- *code |= 0x001000000000ULL; // REX2.B4
+ assert(TakesRex2Prefix(id) || TakesApxExtendedEvexPrefix(id) || TakesEvexPrefix(id));
+ if (hasRex2Prefix(*code))
+ {
+ *code |= 0x001000000000ULL; // REX2.B4
+ }
+ else if (hasEvexPrefix(*code))
+ {
+ *code |= 0x8000000000000ULL; // EVEX.B4
+ }
+ else
+ {
+ // There are cases when this method is called before prefix is attached.
+ }
}
}
else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
@@ -4280,8 +4343,19 @@ inline unsigned emitter::insEncodeReg345(const instrDesc* id, regNumber reg, emi
if (IsExtendedGPReg(reg))
{
// Seperate the encoding for REX2.R3/R4, REX2.R3 will be handled in `AddRexRPrefix`.
- assert(TakesRex2Prefix(id));
- *code |= 0x004000000000ULL; // REX2.R4
+ assert(TakesRex2Prefix(id) || TakesApxExtendedEvexPrefix(id) || TakesEvexPrefix(id));
+ if (hasRex2Prefix(*code))
+ {
+ *code |= 0x004000000000ULL; // REX2.R4
+ }
+ else if (hasEvexPrefix(*code))
+ {
+ *code = AddEvexRPrimePrefix(*code); // EVEX.R4
+ }
+ else
+ {
+ // There are cases when this method is called before prefix is attached.
+ }
}
}
else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
@@ -4339,6 +4413,12 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber
// Have to set the EVEX V' bit
code = AddEvexVPrimePrefix(code);
}
+
+ if (isHighGPReg(reg) && IsBMIInstruction(ins))
+ {
+ // APX: BMI instructions use RVM operand encoding
+ code = AddEvexVPrimePrefix(code);
+ }
#endif
// Shift count = 5-bytes of opcode + 0-2 bits for EVEX
@@ -4364,7 +4444,7 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber
// Rather see these paths cleaned up.
regBits = HighAwareRegEncoding(reg);
- if (false /*reg >= REG_R16 && reg <= REG_R31*/)
+ if (isHighGPReg(reg))
{
// Have to set the EVEX V' bit
code = AddEvexVPrimePrefix(code);
@@ -4410,8 +4490,21 @@ inline unsigned emitter::insEncodeRegSIB(const instrDesc* id, regNumber reg, cod
if (IsExtendedGPReg(reg))
{
// Separate the encoding for REX2.X3/X4, REX2.X3 will be handled in `AddRexXPrefix`.
- assert(TakesRex2Prefix(id));
- *code |= 0x002000000000ULL; // REX2.X4
+ assert(TakesRex2Prefix(id) || TakesApxExtendedEvexPrefix(id) || TakesEvexPrefix(id));
+ if (hasRex2Prefix(*code))
+ {
+ *code |= 0x002000000000ULL; // REX2.X4
+ }
+ else if (hasEvexPrefix(*code))
+ {
+ // Note that APX-EVEX use EVEX.X4 as the MSB of the INDEX register to address GPRs, and the original
+ // EVEX.V4 is used for VSIB addressing.
+ *code &= 0xFFFFFBFFFFFFFFFFULL; // EVEX.X4
+ }
+ else
+ {
+ // There are cases when this method is called before prefix is attached.
+ }
}
}
unsigned regBits = RegEncoding(reg);
@@ -8512,7 +8605,7 @@ void emitter::emitIns_R_R_C_R(instruction ins,
}
//------------------------------------------------------------------------
-// emitIns_R_R_R_S: emits the code for a instruction that takes a register operand, a variable index +
+// emitIns_R_R_S_R: emits the code for a instruction that takes a register operand, a variable index +
// offset, another register operand, and that returns a value in register
//
// Arguments:
@@ -12780,6 +12873,19 @@ void emitter::emitDispIns(
case IF_RRW_RRD_ARD:
case IF_RWR_RWR_ARD:
{
+ if ((ins == INS_bextr) || (ins == INS_bzhi) || (ins == INS_sarx) || (ins == INS_shlx) || (ins == INS_shrx))
+ {
+ // These instructions have their operands swapped to simplify the emitter implementation.
+ // They will appear here as IF_RWR_RRD_ARD but should actually
+ // display as if they were IF_RWR_ARD_RRD.
+
+ printf("%s", emitRegName(id->idReg1(), attr));
+ printf(", %s", sstr);
+ emitDispAddrMode(id);
+ printf(", %s", emitRegName(id->idReg2(), attr));
+ break;
+ }
+
printf("%s", emitRegName(id->idReg1(), attr));
emitDispEmbMasking(id);
printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr);
@@ -13077,6 +13183,20 @@ void emitter::emitDispIns(
case IF_RRW_RRD_SRD:
case IF_RWR_RWR_SRD:
{
+ if ((ins == INS_bextr) || (ins == INS_bzhi) || (ins == INS_sarx) || (ins == INS_shlx) || (ins == INS_shrx))
+ {
+ // These instructions have their operands swapped to simplify the emitter implementation.
+ // They will appear here as IF_RWR_RRD_SRD but should actually
+ // display as if they were IF_RWR_SRD_RRD.
+
+ printf("%s", emitRegName(id->idReg1(), attr));
+ printf(", %s", sstr);
+ emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+ id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+ printf(", %s", emitRegName(id->idReg2(), attr));
+ break;
+ }
+
printf("%s", emitRegName(id->idReg1(), attr));
emitDispEmbMasking(id);
printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr);
@@ -13303,21 +13423,16 @@ void emitter::emitDispIns(
regNumber reg2 = id->idReg2();
regNumber reg3 = id->idReg3();
- if (ins == INS_bextr || ins == INS_bzhi
-#ifdef TARGET_AMD64
- || ins == INS_shrx || ins == INS_shlx || ins == INS_sarx
-#endif
- )
+ if ((ins == INS_bextr) || (ins == INS_bzhi) || (ins == INS_sarx) || (ins == INS_shlx) || (ins == INS_shrx))
{
- // BMI bextr,bzhi, shrx, shlx and sarx encode the reg2 in VEX.vvvv and reg3 in modRM,
- // which is different from most of other instructions
- regNumber tmp = reg2;
- reg2 = reg3;
- reg3 = tmp;
+ // These instructions have their operands swapped to simplify the emitter implementation.
+ // They encode reg3 in VEX.vvvv and reg2 in modRM, which is opposite most instructions.
+ // We swap them back here so they will display in the correct order.
+ std::swap(reg2, reg3);
}
emitAttr attr3 = attr;
- if (hasTupleTypeInfo(ins) && ((insTupleTypeInfo(ins) & INS_TT_MEM128) != 0))
+ if ((insTupleTypeInfo(ins) & INS_TT_MEM128) != 0)
{
// Shift instructions take xmm for the 3rd operand regardless of instruction size.
attr3 = EA_16BYTE;
@@ -13660,6 +13775,20 @@ void emitter::emitDispIns(
case IF_RRW_RRD_MRD:
case IF_RWR_RWR_MRD:
{
+ if ((ins == INS_bextr) || (ins == INS_bzhi) || (ins == INS_sarx) || (ins == INS_shlx) || (ins == INS_shrx))
+ {
+ // These instructions have their operands swapped to simplify the emitter implementation.
+ // They will appear here as IF_RWR_RRD_MRD but should actually
+ // display as if they were IF_RWR_MRD_RRD.
+
+ printf("%s", emitRegName(id->idReg1(), attr));
+ printf(", %s", sstr);
+ offs = emitGetInsDsp(id);
+ emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
+ printf(", %s", emitRegName(id->idReg2(), attr));
+ break;
+ }
+
printf("%s", emitRegName(id->idReg1(), attr));
emitDispEmbMasking(id);
printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr);
@@ -14534,6 +14663,12 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
NO_WAY("unexpected size");
break;
}
+#ifdef TARGET_AMD64
+ if (ins == INS_crc32_apx)
+ {
+ code |= (insEncodeReg345(id, id->idReg1(), size, &code) << 8);
+ }
+#endif // TARGET_AMD64
}
// Output the REX prefix
@@ -15356,6 +15491,12 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
{
dst += emitOutputByte(dst, 0x66);
}
+#ifdef TARGET_AMD64
+ else
+ {
+ code |= EXTENDED_EVEX_PP_BITS;
+ }
+#endif // TARGET_AMD64
}
FALLTHROUGH;
@@ -15401,6 +15542,14 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
NO_WAY("unexpected size");
break;
}
+#ifdef TARGET_AMD64
+ if (ins == INS_crc32_apx)
+ {
+ // The promoted CRC32 is in 1-byte opcode, unlike other instructions on this path, the register encoding for
+ // CRC32 need to be done here.
+ code |= (insEncodeReg345(id, id->idReg1(), size, &code) << 8);
+ }
+#endif // TARGET_AMD64
}
// Output the REX prefix
@@ -16513,7 +16662,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
else if ((ins == INS_bsf) || (ins == INS_bsr) || (ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) ||
(ins == INS_tzcnt)
#ifdef TARGET_AMD64
- || (ins == INS_lzcnt_apx) || (ins == INS_tzcnt_apx) || (ins == INS_popcnt_apx)
+ || (ins == INS_lzcnt_apx) || (ins == INS_tzcnt_apx) || (ins == INS_popcnt_apx) || (ins == INS_crc32_apx)
#endif // TARGET_AMD64
)
{
@@ -16525,11 +16674,24 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
{
code |= 0x0100;
}
+#ifdef TARGET_AMD64
+ if ((ins == INS_crc32_apx) && (size > EA_1BYTE))
+ {
+ code |= 0x01;
+ }
+#endif // TARGET_AMD64
- if (size == EA_2BYTE && !TakesApxExtendedEvexPrefix(id))
+ if (size == EA_2BYTE)
{
- assert(ins == INS_crc32);
- dst += emitOutputByte(dst, 0x66);
+ if (!TakesApxExtendedEvexPrefix(id))
+ {
+ assert(ins == INS_crc32);
+ dst += emitOutputByte(dst, 0x66);
+ }
+ else
+ {
+ code |= EXTENDED_EVEX_PP_BITS;
+ }
}
else if (size == EA_8BYTE)
{
@@ -16982,14 +17144,13 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
code = insCodeACC(ins);
assert(code < 0x100);
- code |= 0x08; // Set the 'w' bit
- unsigned regcode = insEncodeReg012(id, reg, size, &code);
- code |= regcode;
-
// This is INS_mov and will not take VEX prefix
assert(!TakesVexPrefix(ins));
code = AddX86PrefixIfNeededAndNotPresent(id, code, size);
+ code |= 0x08; // Set the 'w' bit
+ unsigned regcode = insEncodeReg012(id, reg, size, &code);
+ code |= regcode;
if (TakesRexWPrefix(id))
{
@@ -20611,22 +20772,31 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
break;
case INS_movd:
- case INS_movq: // only MOVQ xmm, xmm is different (emitted by Sse2.MoveScalar, should use MOVDQU instead)
+ case INS_movq:
if (memAccessKind == PERFSCORE_MEMORY_NONE)
{
- // movd r32, xmm or xmm, r32
- result.insThroughput = PERFSCORE_THROUGHPUT_1C;
- result.insLatency = PERFSCORE_LATENCY_3C;
+ if (isFloatReg(id->idReg1()) && isFloatReg(id->idReg2()))
+ {
+ // movq xmm, xmm
+ result.insThroughput = PERFSCORE_THROUGHPUT_3X;
+ result.insLatency = PERFSCORE_LATENCY_1C;
+ }
+ else
+ {
+ // movd r32/64, xmm or xmm, r32/64
+ result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+ result.insLatency = PERFSCORE_LATENCY_3C;
+ }
}
else if (memAccessKind == PERFSCORE_MEMORY_READ)
{
- // movd xmm, m32
+ // ins xmm, m32/64
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency += PERFSCORE_LATENCY_2C;
}
else
{
- // movd m32, xmm
+ // ins m32/64, xmm
assert(memAccessKind == PERFSCORE_MEMORY_WRITE);
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_2C;
@@ -21345,6 +21515,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_popcnt_apx:
case INS_lzcnt_apx:
case INS_tzcnt_apx:
+ case INS_crc32_apx:
#endif // TARGET_AMD64
{
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
@@ -21753,7 +21924,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
break;
}
-#ifdef TARGET_AMD64
case INS_shlx:
case INS_sarx:
case INS_shrx:
@@ -21762,7 +21932,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
break;
}
-#endif
case INS_vpmovb2m:
case INS_vpmovw2m:
diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h
index 93bd131f5a3472..583ff4c8f93bc3 100644
--- a/src/coreclr/jit/emitxarch.h
+++ b/src/coreclr/jit/emitxarch.h
@@ -297,6 +297,11 @@ bool HasKMaskRegisterDest(instruction ins) const
case INS_vgatherqps:
case INS_vgatherdpd:
case INS_vgatherqpd:
+ // KMOV can be promoted to EVEX with APX.
+ case INS_kmovb_msk:
+ case INS_kmovw_msk:
+ case INS_kmovd_msk:
+ case INS_kmovq_msk:
{
return true;
}
diff --git a/src/coreclr/jit/error.cpp b/src/coreclr/jit/error.cpp
index 5ae6cea056efeb..d04585cef94965 100644
--- a/src/coreclr/jit/error.cpp
+++ b/src/coreclr/jit/error.cpp
@@ -15,6 +15,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#pragma hdrstop
#endif
#include "compiler.h"
+#include "minipal/log.h"
#if MEASURE_FATAL
unsigned fatal_badCode;
@@ -318,7 +319,14 @@ int vflogf(FILE* file, const char* fmt, va_list args)
// 0-length string means flush
if (fmt[0] == '\0')
{
- fflush(file);
+ if (file == procstdout())
+ {
+ minipal_log_flush_verbose();
+ }
+ else
+ {
+ fflush(file);
+ }
return 0;
}
@@ -331,8 +339,15 @@ int vflogf(FILE* file, const char* fmt, va_list args)
OutputDebugStringA(buffer);
}
- // We use fputs here so that this executes as fast a possible
- fputs(&buffer[0], file);
+ if (file == procstdout())
+ {
+ minipal_log_write_verbose(buffer);
+ }
+ else
+ {
+ fputs(&buffer[0], file);
+ }
+
return written;
}
diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp
index e593b92c6b7da6..f6dda4d6a85764 100644
--- a/src/coreclr/jit/fgbasic.cpp
+++ b/src/coreclr/jit/fgbasic.cpp
@@ -3425,20 +3425,54 @@ void Compiler::fgFindBasicBlocks()
unsigned XTnum;
- /* Are there any exception handlers? */
-
+ // Are there any exception handlers?
+ //
if (info.compXcptnsCount > 0)
{
- noway_assert(!compIsForInlining());
+ assert(!compIsForInlining() || opts.compInlineMethodsWithEH);
- /* Check and mark all the exception handlers */
+ if (compIsForInlining())
+ {
+ // Verify we can expand the EH table as needed to incorporate the callee's EH clauses.
+ // Failing here should be extremely rare.
+ //
+ EHblkDsc* const dsc = fgTryAddEHTableEntries(0, info.compXcptnsCount, /* deferAdding */ true);
+ if (dsc == nullptr)
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_EH_TABLE_FULL);
+ }
+ }
+ // Check and mark all the exception handlers
+ //
for (XTnum = 0; XTnum < info.compXcptnsCount; XTnum++)
{
CORINFO_EH_CLAUSE clause;
info.compCompHnd->getEHinfo(info.compMethodHnd, XTnum, &clause);
noway_assert(clause.HandlerLength != (unsigned)-1);
+ // If we're inlining, and the inlinee has a catch clause, we are currently
+ // unable to convey the type of the catch properly, as it is represented
+ // by a token. So, abandon inlining.
+ //
+ // TODO: if inlining methods with catches is rare, consider
+ // transforming class catches into runtime filters like we do in
+ // fgCreateFiltersForGenericExceptions
+ //
+ if (compIsForInlining())
+ {
+ const bool isFinallyFaultOrFilter =
+ (clause.Flags & (CORINFO_EH_CLAUSE_FINALLY | CORINFO_EH_CLAUSE_FAULT | CORINFO_EH_CLAUSE_FILTER)) !=
+ 0;
+
+ if (!isFinallyFaultOrFilter)
+ {
+ JITDUMP("Inlinee EH clause %u is a catch; we can't inline these (yet)\n", XTnum);
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_EH);
+ return;
+ }
+ }
+
if (clause.TryLength <= 0)
{
BADCODE("try block length <=0");
@@ -3577,8 +3611,6 @@ void Compiler::fgFindBasicBlocks()
lvaInlineeReturnSpillTempFreshlyCreated = true;
}
}
-
- return;
}
/* Mark all blocks within 'try' blocks as such */
@@ -3646,6 +3678,7 @@ void Compiler::fgFindBasicBlocks()
BADCODE3("end of hnd block beyond end of method for try", " at offset %04X", tryBegOff);
}
+ HBtab->ebdID = impInlineRoot()->compEHID++;
HBtab->ebdTryBegOffset = tryBegOff;
HBtab->ebdTryEndOffset = tryEndOff;
HBtab->ebdFilterBegOffset = filterBegOff;
diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp
index 6e0b726266bad6..4dc20dcd8d2511 100644
--- a/src/coreclr/jit/fgehopt.cpp
+++ b/src/coreclr/jit/fgehopt.cpp
@@ -690,7 +690,9 @@ PhaseStatus Compiler::fgRemoveEmptyTry()
// Handler index of any nested blocks will update when we
// remove the EH table entry. Change handler exits to jump to
// the continuation. Clear catch type on handler entry.
- // Decrement nesting level of enclosed GT_END_LFINs.
+ //
+ // GT_END_LFIN no longer need updates here, now their gtVal1 fields refer to EH IDs.
+ //
for (BasicBlock* const block : Blocks(firstHandlerBlock, lastHandlerBlock))
{
if (block == firstHandlerBlock)
@@ -725,25 +727,6 @@ PhaseStatus Compiler::fgRemoveEmptyTry()
}
}
}
-
-#if defined(FEATURE_EH_WINDOWS_X86)
- if (!UsesFunclets())
- {
- // If we're in a non-funclet model, decrement the nesting
- // level of any GT_END_LFIN we find in the handler region,
- // since we're removing the enclosing handler.
- for (Statement* const stmt : block->Statements())
- {
- GenTree* expr = stmt->GetRootNode();
- if (expr->gtOper == GT_END_LFIN)
- {
- const size_t nestLevel = expr->AsVal()->gtVal1;
- assert(nestLevel > 0);
- expr->AsVal()->gtVal1 = nestLevel - 1;
- }
- }
- }
-#endif // FEATURE_EH_WINDOWS_X86
}
// (6) Update any impacted ACDs.
@@ -2697,8 +2680,8 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info,
if (bbIsTryBeg(block))
{
assert(added);
- JITDUMP("==> found try entry for EH#%02u nested in handler at " FMT_BB "\n", block->bbNum,
- block->getTryIndex());
+ JITDUMP("==> found try entry for EH#%02u nested in handler at " FMT_BB "\n", block->getTryIndex(),
+ block->bbNum);
regionsToProcess.Push(block->getTryIndex());
}
}
@@ -2778,6 +2761,12 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info,
assert(insertBeforeIndex == enclosingTryIndex);
}
+ if (insertBeforeIndex != compHndBBtabCount)
+ {
+ JITDUMP("Existing EH region(s) EH#%02u...EH#%02u will become EH#%02u...EH#%02u\n", insertBeforeIndex,
+ compHndBBtabCount - 1, insertBeforeIndex + regionCount, compHndBBtabCount + regionCount - 1);
+ }
+
// Once we call fgTryAddEHTableEntries with deferCloning = false,
// all the EH indicies at or above insertBeforeIndex will shift,
// and the EH table may reallocate.
@@ -2870,12 +2859,14 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info,
compHndBBtab[XTnum] = compHndBBtab[originalXTnum];
EHblkDsc* const ebd = &compHndBBtab[XTnum];
+ ebd->ebdID = impInlineRoot()->compEHID++;
+
// Note the outermost region enclosing indices stay the same, because the original
// clause entries got adjusted when we inserted the new clauses.
//
if (ebd->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX)
{
- if (XTnum < clonedOutermostRegionIndex)
+ if (ebd->ebdEnclosingTryIndex < clonedOutermostRegionIndex)
{
ebd->ebdEnclosingTryIndex += (unsigned short)indexShift;
}
@@ -2888,7 +2879,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info,
if (ebd->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX)
{
- if (XTnum < clonedOutermostRegionIndex)
+ if (ebd->ebdEnclosingHndIndex < clonedOutermostRegionIndex)
{
ebd->ebdEnclosingHndIndex += (unsigned short)indexShift;
}
@@ -3028,6 +3019,22 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info,
newBlock->bbRefs++;
}
}
+
+#if defined(FEATURE_EH_WINDOWS_X86)
+ // Update the EH ID for any cloned GT_END_LFIN.
+ //
+ for (Statement* const stmt : newBlock->Statements())
+ {
+ GenTree* const rootNode = stmt->GetRootNode();
+ if (rootNode->OperIs(GT_END_LFIN))
+ {
+ GenTreeVal* const endNode = rootNode->AsVal();
+ EHblkDsc* const oldEbd = ehFindEHblkDscById((unsigned short)endNode->gtVal1);
+ EHblkDsc* const newEbd = oldEbd + indexShift;
+ endNode->gtVal1 = newEbd->ebdID;
+ }
+ }
+#endif
}
JITDUMP("Done fixing region indices\n");
diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp
index 3d1e34a5aa1a19..e6be97f1d38e74 100644
--- a/src/coreclr/jit/fginline.cpp
+++ b/src/coreclr/jit/fginline.cpp
@@ -1035,6 +1035,19 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result,
// Don't expect any surprises here.
assert(result->IsCandidate());
+#if defined(DEBUG)
+ // Fail if we're inlining and we've reached the acceptance limit.
+ //
+ int limit = JitConfig.JitInlineLimit();
+ unsigned current = m_inlineStrategy->GetInlineCount();
+
+ if ((limit >= 0) && (current >= static_cast(limit)))
+ {
+ result->NoteFatal(InlineObservation::CALLSITE_OVER_INLINE_LIMIT);
+ return;
+ }
+#endif // defined(DEBUG)
+
if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING)
{
// For now, attributing this to call site, though it's really
@@ -1165,7 +1178,7 @@ void Compiler::fgNoteNonInlineCandidate(Statement* stmt, GenTreeCall* call)
return;
}
- InlineResult inlineResult(this, call, nullptr, "fgNoteNonInlineCandidate", false);
+ InlineResult inlineResult(this, call, nullptr, "fgNoteNonInlineCandidate", true);
InlineObservation currentObservation = InlineObservation::CALLSITE_NOT_CANDIDATE;
// Try and recover the reason left behind when the jit decided
@@ -1568,13 +1581,179 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
//
bottomBlock->RemoveFlags(BBF_DONT_REMOVE);
+ // If the inlinee has EH, merge the EH tables, and figure out how much of
+ // a shift we need to make in the inlinee blocks EH indicies.
+ //
+ unsigned const inlineeRegionCount = InlineeCompiler->compHndBBtabCount;
+ const bool inlineeHasEH = inlineeRegionCount > 0;
+ unsigned inlineeIndexShift = 0;
+
+ if (inlineeHasEH)
+ {
+ // If the call site also has EH, we need to insert the inlinee clauses
+ // so they are a child of the call site's innermost enclosing region.
+ // Figure out what this is.
+ //
+ bool inTryRegion = false;
+ unsigned const enclosingRegion = ehGetMostNestedRegionIndex(iciBlock, &inTryRegion);
+
+ // We will insert the inlinee clauses in bulk before this index.
+ //
+ unsigned insertBeforeIndex = 0;
+
+ if (enclosingRegion == 0)
+ {
+ // The call site is not in an EH region, so we can put the inlinee EH clauses
+ // at the end of root method's the EH table.
+ //
+ // For example, if the root method already has EH#0, and the inlinee has 2 regions
+ //
+ // enclosingRegion will be 0
+ // inlineeIndexShift will be 1
+ // insertBeforeIndex will be 1
+ //
+ // inlinee eh0 -> eh1
+ // inlinee eh1 -> eh2
+ //
+ // root eh0 -> eh0
+ //
+ inlineeIndexShift = compHndBBtabCount;
+ insertBeforeIndex = compHndBBtabCount;
+ }
+ else
+ {
+ // The call site is in an EH region, so we can put the inlinee EH clauses
+ // just before the enclosing region
+ //
+ // Note enclosingRegion is region index + 1. So EH#0 will be represented by 1 here.
+ //
+ // For example, if the enclosing EH regions are try#2 and hnd#3, and the inlinee has 2 eh clauses
+ //
+ // enclosingRegion will be 3 (try2 + 1)
+ // inlineeIndexShift will be 2
+ // insertBeforeIndex will be 2
+ //
+ // inlinee eh0 -> eh2
+ // inlinee eh1 -> eh3
+ //
+ // root eh0 -> eh0
+ // root eh1 -> eh1
+ //
+ // root eh2 -> eh4
+ // root eh3 -> eh5
+ //
+ inlineeIndexShift = enclosingRegion - 1;
+ insertBeforeIndex = enclosingRegion - 1;
+ }
+
+ JITDUMP(
+ "Inlinee has EH. In root method, inlinee's %u EH region indices will shift by %u and become EH#%02u ... EH#%02u (%p)\n",
+ inlineeRegionCount, inlineeIndexShift, insertBeforeIndex, insertBeforeIndex + inlineeRegionCount - 1,
+ &inlineeIndexShift);
+
+ if (enclosingRegion != 0)
+ {
+ JITDUMP("Inlinee is nested within current %s EH#%02u (which will become EH#%02u)\n",
+ inTryRegion ? "try" : "hnd", enclosingRegion - 1, enclosingRegion - 1 + inlineeRegionCount);
+ }
+ else
+ {
+ JITDUMP("Inlinee is not nested inside any EH region\n");
+ }
+
+ // Grow the EH table.
+ //
+ // TODO: verify earlier that this won't fail...
+ //
+ EHblkDsc* const outermostEbd =
+ fgTryAddEHTableEntries(insertBeforeIndex, inlineeRegionCount, /* deferAdding */ false);
+ assert(outermostEbd != nullptr);
+
+ // fgTryAddEHTableEntries has adjusted the indices of all root method blocks and EH clauses
+ // to accommodate the new entries. No other changes to those are needed.
+ //
+ // We just need to add in and fix up the new entries from the inlinee.
+ //
+ // Fetch the new enclosing try/handler table indicies.
+ //
+ const unsigned enclosingTryIndex =
+ iciBlock->hasTryIndex() ? iciBlock->getTryIndex() : EHblkDsc::NO_ENCLOSING_INDEX;
+ const unsigned enclosingHndIndex =
+ iciBlock->hasHndIndex() ? iciBlock->getHndIndex() : EHblkDsc::NO_ENCLOSING_INDEX;
+
+ // Copy over the EH table entries from inlinee->root and adjust their enclosing indicies.
+ //
+ for (unsigned XTnum = 0; XTnum < inlineeRegionCount; XTnum++)
+ {
+ unsigned newXTnum = XTnum + inlineeIndexShift;
+ compHndBBtab[newXTnum] = InlineeCompiler->compHndBBtab[XTnum];
+ EHblkDsc* const ebd = &compHndBBtab[newXTnum];
+
+ if (ebd->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ ebd->ebdEnclosingTryIndex += (unsigned short)inlineeIndexShift;
+ }
+ else
+ {
+ ebd->ebdEnclosingTryIndex = (unsigned short)enclosingTryIndex;
+ }
+
+ if (ebd->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ ebd->ebdEnclosingHndIndex += (unsigned short)inlineeIndexShift;
+ }
+ else
+ {
+ ebd->ebdEnclosingHndIndex = (unsigned short)enclosingHndIndex;
+ }
+ }
+ }
+
+ // Fetch the new enclosing try/handler indicies for blocks.
+ // Note these are represented differently than the EH table indices.
+ //
+ const unsigned blockEnclosingTryIndex = iciBlock->hasTryIndex() ? iciBlock->getTryIndex() + 1 : 0;
+ const unsigned blockEnclosingHndIndex = iciBlock->hasHndIndex() ? iciBlock->getHndIndex() + 1 : 0;
+
// Set the try and handler index and fix the jump types of inlinee's blocks.
//
for (BasicBlock* const block : InlineeCompiler->Blocks())
{
- noway_assert(!block->hasTryIndex());
- noway_assert(!block->hasHndIndex());
- block->copyEHRegion(iciBlock);
+ if (block->hasTryIndex())
+ {
+ JITDUMP("Inlinee " FMT_BB " has old try index %u, shift %u, new try index %u\n", block->bbNum,
+ (unsigned)block->bbTryIndex, inlineeIndexShift,
+ (unsigned)(block->bbTryIndex + inlineeIndexShift));
+ block->bbTryIndex += (unsigned short)inlineeIndexShift;
+ }
+ else
+ {
+ block->bbTryIndex = (unsigned short)blockEnclosingTryIndex;
+ }
+
+ if (block->hasHndIndex())
+ {
+ block->bbHndIndex += (unsigned short)inlineeIndexShift;
+ }
+ else
+ {
+ block->bbHndIndex = (unsigned short)blockEnclosingHndIndex;
+ }
+
+ // Sanity checks
+ //
+ if (iciBlock->hasTryIndex())
+ {
+ assert(block->hasTryIndex());
+ assert(block->getTryIndex() <= iciBlock->getTryIndex());
+ }
+
+ if (iciBlock->hasHndIndex())
+ {
+ assert(block->hasHndIndex());
+ assert(block->getHndIndex() <= iciBlock->getHndIndex());
+ }
+
block->CopyFlags(iciBlock, BBF_BACKWARD_JUMP | BBF_PROF_WEIGHT);
// Update block nums appropriately
@@ -1763,9 +1942,6 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
// If the call site is not in a try and the callee has a throw,
// we may introduce inconsistency.
//
- // Technically we should check if the callee has a throw not in a try, but since
- // we can't inline methods with EH yet we don't see those.
- //
if (InlineeCompiler->fgThrowCount > 0)
{
JITDUMP("INLINER: may-throw inlinee\n");
diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp
index ca8b79bcbf9ae9..35ed6ab2a6b0f0 100644
--- a/src/coreclr/jit/fgopt.cpp
+++ b/src/coreclr/jit/fgopt.cpp
@@ -2519,17 +2519,13 @@ void Compiler::fgRemoveConditionalJump(BasicBlock* block)
//
bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
{
- if (opts.MinOpts())
- {
- return false;
- }
+ assert(opts.OptimizationEnabled());
if (!bJump->KindIs(BBJ_ALWAYS))
{
return false;
}
- // We might be able to compact blocks that always jump to the next block.
if (bJump->JumpsToNext())
{
return false;
@@ -2540,7 +2536,7 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
return false;
}
- BasicBlock* bDest = bJump->GetTarget();
+ BasicBlock* const bDest = bJump->GetTarget();
if (!bDest->KindIs(BBJ_COND))
{
@@ -2559,17 +2555,13 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
return false;
}
- // do not jump into another try region
- BasicBlock* bDestNormalTarget = bDest->GetFalseTarget();
- if (bDestNormalTarget->hasTryIndex() && !BasicBlock::sameTryRegion(bJump, bDestNormalTarget))
- {
- return false;
- }
+ // We should have already compacted 'bDest' into 'bJump', if it is possible.
+ assert(!fgCanCompactBlock(bJump));
- // This function is only called by fgReorderBlocks, which we do not run in the backend.
- // If we wanted to run block reordering in the backend, we would need to be able to
- // calculate cost information for LIR on a per-node basis in order for this function
- // to work.
+ BasicBlock* const trueTarget = bDest->GetTrueTarget();
+ BasicBlock* const falseTarget = bDest->GetFalseTarget();
+
+ // This function is only called in the frontend.
assert(!bJump->IsLIR());
assert(!bDest->IsLIR());
@@ -2593,10 +2585,10 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
bool haveProfileWeights = false;
weight_t weightJump = bJump->bbWeight;
weight_t weightDest = bDest->bbWeight;
- weight_t weightNext = bJump->Next()->bbWeight;
+ weight_t weightNext = trueTarget->bbWeight;
bool rareJump = bJump->isRunRarely();
bool rareDest = bDest->isRunRarely();
- bool rareNext = bJump->Next()->isRunRarely();
+ bool rareNext = trueTarget->isRunRarely();
// If we have profile data then we calculate the number of time
// the loop will iterate into loopIterations
@@ -2607,7 +2599,7 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
//
if (bJump->HasAnyFlag(BBF_PROF_WEIGHT | BBF_RUN_RARELY) &&
bDest->HasAnyFlag(BBF_PROF_WEIGHT | BBF_RUN_RARELY) &&
- bJump->Next()->HasAnyFlag(BBF_PROF_WEIGHT | BBF_RUN_RARELY))
+ trueTarget->HasAnyFlag(BBF_PROF_WEIGHT | BBF_RUN_RARELY))
{
haveProfileWeights = true;
@@ -2721,7 +2713,7 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
noway_assert(condTree->gtOper == GT_JTRUE);
// Set condTree to the operand to the GT_JTRUE.
- condTree = condTree->AsOp()->gtOp1;
+ condTree = condTree->gtGetOp1();
// This condTree has to be a RelOp comparison.
if (condTree->OperIsCompare() == false)
@@ -2773,12 +2765,11 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
// because the comparison in 'bJump' is flipped.
// Similarly, we will derive the true edge's likelihood from 'destFalseEdge'.
//
- BasicBlock* const bDestFalseTarget = bJump->Next();
- FlowEdge* const falseEdge = fgAddRefPred(bDestFalseTarget, bJump, destTrueEdge);
+ FlowEdge* const falseEdge = fgAddRefPred(trueTarget, bJump, destTrueEdge);
// bJump now jumps to bDest's normal jump target
//
- fgRedirectTargetEdge(bJump, bDestNormalTarget);
+ fgRedirectTargetEdge(bJump, falseTarget);
bJump->GetTargetEdge()->setLikelihood(destFalseEdge->getLikelihood());
bJump->SetCond(bJump->GetTargetEdge(), falseEdge);
@@ -2793,10 +2784,10 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
// Propagate bJump's weight into its new successors
//
- bDestNormalTarget->setBBProfileWeight(bDestNormalTarget->computeIncomingWeight());
- bDestFalseTarget->setBBProfileWeight(bDestFalseTarget->computeIncomingWeight());
+ trueTarget->setBBProfileWeight(trueTarget->computeIncomingWeight());
+ falseTarget->setBBProfileWeight(falseTarget->computeIncomingWeight());
- if ((bDestNormalTarget->NumSucc() > 0) || (bDestFalseTarget->NumSucc() > 0))
+ if ((trueTarget->NumSucc() > 0) || (falseTarget->NumSucc() > 0))
{
JITDUMP("fgOptimizeBranch: New flow out of " FMT_BB " needs to be propagated. Data %s inconsistent.\n",
bJump->bbNum, fgPgoConsistent ? "is now" : "was already");
@@ -2821,6 +2812,14 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
}
#endif // DEBUG
+ // Removing flow from 'bJump' into 'bDest' may have made it possible to compact the latter.
+ BasicBlock* const uniquePred = bDest->GetUniquePred(this);
+ if ((uniquePred != nullptr) && fgCanCompactBlock(uniquePred))
+ {
+ JITDUMP(FMT_BB " can now be compacted into its remaining predecessor.\n", bDest->bbNum);
+ fgCompactBlock(uniquePred);
+ }
+
return true;
}
@@ -3244,1163 +3243,6 @@ bool Compiler::fgExpandRarelyRunBlocks()
return result;
}
-#ifdef _PREFAST_
-#pragma warning(push)
-#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
-#endif
-
-//-----------------------------------------------------------------------------
-// fgReorderBlocks: reorder blocks to favor frequent fall through paths
-// and move rare blocks to the end of the method/eh region.
-//
-// Arguments:
-// useProfile - if true, use profile data (if available) to more aggressively
-// reorder the blocks.
-//
-// Returns:
-// True if anything got reordered. Reordering blocks may require changing
-// IR to reverse branch conditions.
-//
-// Notes:
-// We currently allow profile-driven switch opts even when useProfile is false,
-// as they are unlikely to lead to reordering..
-//
-bool Compiler::fgReorderBlocks(bool useProfile)
-{
- noway_assert(opts.compDbgCode == false);
-
- // We can't relocate anything if we only have one block
- if (fgFirstBB->IsLast())
- {
- return false;
- }
-
- bool newRarelyRun = false;
- bool movedBlocks = false;
- bool optimizedSwitches = false;
- bool optimizedBranches = false;
-
- // First let us expand the set of run rarely blocks
- newRarelyRun |= fgExpandRarelyRunBlocks();
-
- if (useProfile)
- {
- // Don't run the new layout until we get to the backend,
- // since LSRA can introduce new blocks, and lowering can churn the flowgraph.
- //
- if (JitConfig.JitDoReversePostOrderLayout())
- {
- return (newRarelyRun || movedBlocks || optimizedSwitches);
- }
-
- // We will be reordering blocks, so ensure the false target of a BBJ_COND block is its next block
- for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next())
- {
- if (block->KindIs(BBJ_COND) && !block->NextIs(block->GetFalseTarget()))
- {
- if (block->CanRemoveJumpToTarget(block->GetTrueTarget(), this))
- {
- // Reverse the jump condition
- GenTree* test = block->lastNode();
- assert(test->OperIsConditionalJump());
- test->AsOp()->gtOp1 = gtReverseCond(test->AsOp()->gtOp1);
-
- FlowEdge* const newFalseEdge = block->GetTrueEdge();
- FlowEdge* const newTrueEdge = block->GetFalseEdge();
- block->SetTrueEdge(newTrueEdge);
- block->SetFalseEdge(newFalseEdge);
- assert(block->CanRemoveJumpToTarget(block->GetFalseTarget(), this));
- }
- else
- {
- BasicBlock* jmpBlk = fgConnectFallThrough(block, block->GetFalseTarget());
- assert(jmpBlk != nullptr);
- assert(block->NextIs(jmpBlk));
-
- // Skip next block
- block = jmpBlk;
- }
- }
- }
- }
-
-#ifdef DEBUG
- if (verbose)
- {
- printf("*************** In fgReorderBlocks()\n");
-
- printf("\nInitial BasicBlocks");
- fgDispBasicBlocks(verboseTrees);
- printf("\n");
- }
-#endif // DEBUG
-
- BasicBlock* bNext;
- BasicBlock* bPrev;
- BasicBlock* block;
- unsigned XTnum;
- EHblkDsc* HBtab;
-
- // Iterate over every block, remembering our previous block in bPrev
- for (bPrev = fgFirstBB, block = bPrev->Next(); block != nullptr; bPrev = block, block = block->Next())
- {
- //
- // Consider relocating the rarely run blocks such that they are at the end of the method.
- // We also consider reversing conditional branches so that they become a not taken forwards branch.
- //
-
- // Don't consider BBJ_CALLFINALLYRET; it should be processed together with BBJ_CALLFINALLY.
- if (block->KindIs(BBJ_CALLFINALLYRET))
- {
- continue;
- }
-
- // If block is marked with a BBF_KEEP_BBJ_ALWAYS flag then we don't move the block
- if (block->HasFlag(BBF_KEEP_BBJ_ALWAYS))
- {
- continue;
- }
-
- // Finally and handlers blocks are to be kept contiguous.
- // TODO-CQ: Allow reordering within the handler region
- if (block->hasHndIndex())
- {
- continue;
- }
-
- bool reorderBlock = useProfile;
- const bool isRare = block->isRunRarely();
- BasicBlock* bDest = nullptr;
- bool forwardBranch = false;
- bool backwardBranch = false;
-
- // Setup bDest
- if (bPrev->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET))
- {
- bDest = bPrev->GetTarget();
- forwardBranch = fgIsForwardBranch(bPrev, bDest);
- backwardBranch = !forwardBranch;
- }
- else if (bPrev->KindIs(BBJ_COND))
- {
- // fgReorderBlocks is called in more than one optimization phase,
- // but only does any reordering in optOptimizeLayout.
- // At that point, we expect implicit fallthrough to be restored for BBJ_COND blocks.
- assert(bPrev->FalseTargetIs(block) || !reorderBlock);
- bDest = bPrev->GetTrueTarget();
- forwardBranch = fgIsForwardBranch(bPrev, bDest);
- backwardBranch = !forwardBranch;
- }
-
- // We will look for bPrev as a non rarely run block followed by block as a rarely run block
- //
- if (bPrev->isRunRarely())
- {
- reorderBlock = false;
- }
-
- // If the weights of the bPrev, block and bDest were all obtained from a profile run
- // then we can use them to decide if it is useful to reverse this conditional branch
-
- weight_t profHotWeight = -1;
-
- if (useProfile && bPrev->hasProfileWeight() && block->hasProfileWeight() &&
- ((bDest == nullptr) || bDest->hasProfileWeight()))
- {
- //
- // All blocks have profile information
- //
- if (forwardBranch)
- {
- if (bPrev->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET))
- {
- if (bPrev->JumpsToNext())
- {
- bDest = nullptr;
- goto CHECK_FOR_RARE;
- }
- // We can pull up the blocks that the unconditional jump branches to
- // if the weight of bDest is greater or equal to the weight of block
- // also the weight of bDest can't be zero.
- // Don't reorder if bPrev's jump destination is the next block.
- //
- else if ((bDest->bbWeight < block->bbWeight) || (bDest->bbWeight == BB_ZERO_WEIGHT))
- {
- reorderBlock = false;
- }
- else
- {
- //
- // If this remains true then we will try to pull up bDest to succeed bPrev
- //
- bool moveDestUp = true;
-
- //
- // The edge bPrev -> bDest must have a higher weight
- // than every other edge into bDest
- //
- weight_t const weightToBeat = bPrev->GetTargetEdge()->getLikelyWeight();
-
- // Examine all of the other edges into bDest
- for (FlowEdge* const edge : bDest->PredEdges())
- {
- if (edge->getLikelyWeight() > weightToBeat)
- {
- moveDestUp = false;
- break;
- }
- }
-
- // Are we still good to move bDest up to bPrev?
- if (moveDestUp)
- {
- //
- // We will consider all blocks that have less weight than profHotWeight to be
- // uncommonly run blocks as compared with the hot path of bPrev taken-jump to bDest
- //
- profHotWeight = bDest->bbWeight - 1;
- }
- else
- {
- if (block->isRunRarely())
- {
- // We will move any rarely run blocks blocks
- profHotWeight = 0;
- }
- else
- {
- // We will move all blocks that have a weight less or equal to our fall through block
- profHotWeight = block->bbWeight + 1;
- }
- // But we won't try to connect with bDest
- bDest = nullptr;
- }
- }
- }
- else // (bPrev->KindIs(BBJ_COND))
- {
- noway_assert(bPrev->KindIs(BBJ_COND));
- //
- // We will reverse branch if the true edge's likelihood is more than 51%.
- //
- // We will set up profHotWeight to be maximum bbWeight that a block
- // could have for us not to want to reverse the conditional branch.
- //
- // We will consider all blocks that have less weight than profHotWeight to be
- // uncommonly run blocks compared to the weight of bPrev's true edge.
- //
- // We will check if bPrev's true edge weight
- // is more than twice bPrev's false edge weight.
- //
- // bPrev --> [BB04, weight 100]
- // | \.
- // falseEdge ---------------> O \.
- // [likelihood=0.33] V \.
- // block --> [BB05, weight 33] \.
- // \.
- // trueEdge ------------------------------> O
- // [likelihood=0.67] |
- // V
- // bDest ---------------> [BB08, weight 67]
- //
- assert(bPrev->FalseTargetIs(block));
- FlowEdge* trueEdge = bPrev->GetTrueEdge();
- FlowEdge* falseEdge = bPrev->GetFalseEdge();
- noway_assert(trueEdge != nullptr);
- noway_assert(falseEdge != nullptr);
-
- // If we take the true branch more than half the time, we will reverse the branch.
- if (trueEdge->getLikelihood() < 0.51)
- {
- reorderBlock = false;
- }
- else
- {
- // set profHotWeight
- profHotWeight = falseEdge->getLikelyWeight() - 1;
- }
- }
- }
- else // not a forwardBranch
- {
- if (bPrev->bbFallsThrough())
- {
- goto CHECK_FOR_RARE;
- }
-
- // Here we should pull up the highest weight block remaining
- // and place it here since bPrev does not fall through.
-
- weight_t highestWeight = 0;
- BasicBlock* candidateBlock = nullptr;
- BasicBlock* lastNonFallThroughBlock = bPrev;
- BasicBlock* bTmp = bPrev->Next();
-
- while (bTmp != nullptr)
- {
- // Don't try to split a call finally pair
- //
- if (bTmp->isBBCallFinallyPair())
- {
- // Move bTmp forward
- bTmp = bTmp->Next();
- }
-
- //
- // Check for loop exit condition
- //
- if (bTmp == nullptr)
- {
- break;
- }
-
- //
- // if its weight is the highest one we've seen and
- // the EH regions allow for us to place bTmp after bPrev
- //
- if ((bTmp->bbWeight > highestWeight) && fgEhAllowsMoveBlock(bPrev, bTmp))
- {
- // When we have a current candidateBlock that is a conditional (or unconditional) jump
- // to bTmp (which is a higher weighted block) then it is better to keep our current
- // candidateBlock and have it fall into bTmp
- //
- if ((candidateBlock == nullptr) || !candidateBlock->KindIs(BBJ_COND, BBJ_ALWAYS) ||
- (candidateBlock->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET) &&
- (!candidateBlock->TargetIs(bTmp) || candidateBlock->JumpsToNext())) ||
- (candidateBlock->KindIs(BBJ_COND) && !candidateBlock->TrueTargetIs(bTmp)))
- {
- // otherwise we have a new candidateBlock
- //
- highestWeight = bTmp->bbWeight;
- candidateBlock = lastNonFallThroughBlock->Next();
- }
- }
-
- const bool bTmpJumpsToNext = bTmp->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET) && bTmp->JumpsToNext();
- if ((!bTmp->bbFallsThrough() && !bTmpJumpsToNext) || (bTmp->bbWeight == BB_ZERO_WEIGHT))
- {
- lastNonFallThroughBlock = bTmp;
- }
-
- bTmp = bTmp->Next();
- }
-
- // If we didn't find a suitable block then skip this
- if (highestWeight == 0)
- {
- reorderBlock = false;
- }
- else
- {
- noway_assert(candidateBlock != nullptr);
-
- // If the candidateBlock is the same a block then skip this
- if (candidateBlock == block)
- {
- reorderBlock = false;
- }
- else
- {
- // Set bDest to the block that we want to come after bPrev
- bDest = candidateBlock;
-
- // set profHotWeight
- profHotWeight = highestWeight - 1;
- }
- }
- }
- }
- else // we don't have good profile info (or we are falling through)
- {
-
- CHECK_FOR_RARE:;
-
- /* We only want to reorder when we have a rarely run */
- /* block right after a normal block, */
- /* (bPrev is known to be a normal block at this point) */
- if (!isRare)
- {
- if (block->NextIs(bDest) && block->KindIs(BBJ_RETURN) && bPrev->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET))
- {
- // This is a common case with expressions like "return Expr1 && Expr2" -- move the return
- // to establish fall-through.
- }
- else
- {
- reorderBlock = false;
- }
- }
- else
- {
- /* If the jump target bDest is also a rarely run block then we don't want to do the reversal */
- if (bDest && bDest->isRunRarely())
- {
- reorderBlock = false; /* Both block and bDest are rarely run */
- }
- else
- {
- // We will move any rarely run blocks blocks
- profHotWeight = 0;
- }
- }
- }
-
- if (reorderBlock == false)
- {
- //
- // Check for an unconditional branch to a conditional branch
- // which also branches back to our next block
- //
- const bool optimizedBranch = fgOptimizeBranch(bPrev);
- if (optimizedBranch)
- {
- noway_assert(bPrev->KindIs(BBJ_COND));
- optimizedBranches = true;
- }
- continue;
- }
-
- // Now we need to determine which blocks should be moved
- //
- // We consider one of two choices:
- //
- // 1. Moving the fall-through blocks (or rarely run blocks) down to
- // later in the method and hopefully connecting the jump dest block
- // so that it becomes the fall through block
- //
- // And when bDest is not NULL, we also consider:
- //
- // 2. Moving the bDest block (or blocks) up to bPrev
- // so that it could be used as a fall through block
- //
- // We will prefer option #1 if we are able to connect the jump dest
- // block as the fall though block otherwise will we try to use option #2
- //
-
- //
- // Consider option #1: relocating blocks starting at 'block'
- // to later in flowgraph
- //
- // We set bStart to the first block that will be relocated
- // and bEnd to the last block that will be relocated
-
- BasicBlock* bStart = block;
- BasicBlock* bEnd = bStart;
- bNext = bEnd->Next();
- bool connected_bDest = false;
-
- if ((backwardBranch && !isRare) || block->HasFlag(BBF_DONT_REMOVE)) // Don't choose option #1 when block is the
- // start of a try region
- {
- bStart = nullptr;
- bEnd = nullptr;
- }
- else
- {
- while (true)
- {
- // Don't try to split a call finally pair
- //
- if (bEnd->isBBCallFinallyPair())
- {
- // Move bEnd and bNext forward
- bEnd = bNext;
- bNext = bNext->Next();
- }
-
- //
- // Check for loop exit condition
- //
- if (bNext == nullptr)
- {
- break;
- }
-
- // Check if we've reached the funclets region, at the end of the function
- if (bEnd->NextIs(fgFirstFuncletBB))
- {
- break;
- }
-
- if (bNext == bDest)
- {
- connected_bDest = true;
- break;
- }
-
- // All the blocks must have the same try index
- // and must not have the BBF_DONT_REMOVE flag set
-
- if (!BasicBlock::sameTryRegion(bStart, bNext) || bNext->HasFlag(BBF_DONT_REMOVE))
- {
- // exit the loop, bEnd is now set to the
- // last block that we want to relocate
- break;
- }
-
- // If we are relocating rarely run blocks..
- if (isRare)
- {
- // ... then all blocks must be rarely run
- if (!bNext->isRunRarely())
- {
- // exit the loop, bEnd is now set to the
- // last block that we want to relocate
- break;
- }
- }
- else
- {
- // If we are moving blocks that are hot then all
- // of the blocks moved must be less than profHotWeight */
- if (bNext->bbWeight >= profHotWeight)
- {
- // exit the loop, bEnd is now set to the
- // last block that we would relocate
- break;
- }
- }
-
- // Move bEnd and bNext forward
- bEnd = bNext;
- bNext = bNext->Next();
- }
-
- // Set connected_bDest to true if moving blocks [bStart .. bEnd]
- // connects with the jump dest of bPrev (i.e bDest) and
- // thus allows bPrev fall through instead of jump.
- if (bNext == bDest)
- {
- connected_bDest = true;
- }
- }
-
- // Now consider option #2: Moving the jump dest block (or blocks)
- // up to bPrev
- //
- // The variables bStart2, bEnd2 and bPrev2 are used for option #2
- //
- // We will setup bStart2 to the first block that will be relocated
- // and bEnd2 to the last block that will be relocated
- // and bPrev2 to be the lexical pred of bDest
- //
- // If after this calculation bStart2 is NULL we cannot use option #2,
- // otherwise bStart2, bEnd2 and bPrev2 are all non-NULL and we will use option #2
-
- BasicBlock* bStart2 = nullptr;
- BasicBlock* bEnd2 = nullptr;
- BasicBlock* bPrev2 = nullptr;
-
- // If option #1 didn't connect bDest and bDest isn't NULL
- if ((connected_bDest == false) && (bDest != nullptr) &&
- // The jump target cannot be moved if it has the BBF_DONT_REMOVE flag set
- !bDest->HasFlag(BBF_DONT_REMOVE))
- {
- // We will consider option #2: relocating blocks starting at 'bDest' to succeed bPrev
- //
- // setup bPrev2 to be the lexical pred of bDest
-
- bPrev2 = block;
- while (bPrev2 != nullptr)
- {
- if (bPrev2->NextIs(bDest))
- {
- break;
- }
-
- bPrev2 = bPrev2->Next();
- }
-
- if ((bPrev2 != nullptr) && fgEhAllowsMoveBlock(bPrev, bDest))
- {
- // We have decided that relocating bDest to be after bPrev is best
- // Set bStart2 to the first block that will be relocated
- // and bEnd2 to the last block that will be relocated
- //
- // Assigning to bStart2 selects option #2
- //
- bStart2 = bDest;
- bEnd2 = bStart2;
- bNext = bEnd2->Next();
-
- while (true)
- {
- // Don't try to split a call finally pair
- //
- if (bEnd2->isBBCallFinallyPair())
- {
- noway_assert(bNext->KindIs(BBJ_CALLFINALLYRET));
- // Move bEnd2 and bNext forward
- bEnd2 = bNext;
- bNext = bNext->Next();
- }
-
- // Check for the Loop exit conditions
-
- if (bNext == nullptr)
- {
- break;
- }
-
- if (bEnd2->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET) && bEnd2->JumpsToNext())
- {
- // Treat jumps to next block as fall-through
- }
- else if (bEnd2->bbFallsThrough() == false)
- {
- break;
- }
-
- // If we are relocating rarely run blocks..
- // All the blocks must have the same try index,
- // and must not have the BBF_DONT_REMOVE flag set
-
- if (!BasicBlock::sameTryRegion(bStart2, bNext) || bNext->HasFlag(BBF_DONT_REMOVE))
- {
- // exit the loop, bEnd2 is now set to the
- // last block that we want to relocate
- break;
- }
-
- if (isRare)
- {
- /* ... then all blocks must not be rarely run */
- if (bNext->isRunRarely())
- {
- // exit the loop, bEnd2 is now set to the
- // last block that we want to relocate
- break;
- }
- }
- else
- {
- // If we are relocating hot blocks
- // all blocks moved must be greater than profHotWeight
- if (bNext->bbWeight <= profHotWeight)
- {
- // exit the loop, bEnd2 is now set to the
- // last block that we want to relocate
- break;
- }
- }
-
- // Move bEnd2 and bNext forward
- bEnd2 = bNext;
- bNext = bNext->Next();
- }
- }
- }
-
- // If we are using option #1 then ...
- if (bStart2 == nullptr)
- {
- // Don't use option #1 for a backwards branch
- if (bStart == nullptr)
- {
- continue;
- }
-
- // .... Don't move a set of blocks that are already at the end of the main method
- if (bEnd == fgLastBBInMainFunction())
- {
- continue;
- }
- }
-
-#ifdef DEBUG
- if (verbose)
- {
- if (bDest != nullptr)
- {
- if (bPrev->KindIs(BBJ_COND))
- {
- printf("Decided to reverse conditional branch at block " FMT_BB " branch to " FMT_BB " ",
- bPrev->bbNum, bDest->bbNum);
- }
- else if (bPrev->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET))
- {
- printf("Decided to straighten unconditional branch at block " FMT_BB " branch to " FMT_BB " ",
- bPrev->bbNum, bDest->bbNum);
- }
- else
- {
- printf("Decided to place hot code after " FMT_BB ", placed " FMT_BB " after this block ",
- bPrev->bbNum, bDest->bbNum);
- }
-
- if (profHotWeight > 0)
- {
- printf("because of IBC profile data\n");
- }
- else
- {
- if (bPrev->bbFallsThrough())
- {
- printf("since it falls into a rarely run block\n");
- }
- else
- {
- printf("since it is succeeded by a rarely run block\n");
- }
- }
- }
- else
- {
- printf("Decided to relocate block(s) after block " FMT_BB " since they are %s block(s)\n", bPrev->bbNum,
- block->isRunRarely() ? "rarely run" : "uncommonly run");
- }
- }
-#endif // DEBUG
-
- // We will set insertAfterBlk to the block the precedes our insertion range
- // We will set bStartPrev to be the block that precedes the set of blocks that we are moving
- BasicBlock* insertAfterBlk;
- BasicBlock* bStartPrev;
-
- if (bStart2 != nullptr)
- {
- // Option #2: relocating blocks starting at 'bDest' to follow bPrev
-
- // Update bStart and bEnd so that we can use these two for all later operations
- bStart = bStart2;
- bEnd = bEnd2;
-
- // Set bStartPrev to be the block that comes before bStart
- bStartPrev = bPrev2;
-
- // We will move [bStart..bEnd] to immediately after bPrev
- insertAfterBlk = bPrev;
- }
- else
- {
- // option #1: Moving the fall-through blocks (or rarely run blocks) down to later in the method
-
- // Set bStartPrev to be the block that come before bStart
- bStartPrev = bPrev;
-
- // We will move [bStart..bEnd] but we will pick the insert location later
- insertAfterBlk = nullptr;
- }
-
- // We are going to move [bStart..bEnd] so they can't be NULL
- noway_assert(bStart != nullptr);
- noway_assert(bEnd != nullptr);
-
- // bEnd can't be a BBJ_CALLFINALLY unless it is a RETLESS call
- noway_assert(!bEnd->KindIs(BBJ_CALLFINALLY) || bEnd->HasFlag(BBF_RETLESS_CALL));
-
- // bStartPrev must be set to the block that precedes bStart
- noway_assert(bStartPrev->NextIs(bStart));
-
- // Since we will be unlinking [bStart..bEnd],
- // we need to compute and remember if bStart is in each of
- // the try and handler regions
- //
- bool* fStartIsInTry = nullptr;
- bool* fStartIsInHnd = nullptr;
-
- if (compHndBBtabCount > 0)
- {
- fStartIsInTry = new (this, CMK_Generic) bool[compHndBBtabCount];
- fStartIsInHnd = new (this, CMK_Generic) bool[compHndBBtabCount];
-
- for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
- {
- fStartIsInTry[XTnum] = HBtab->InTryRegionBBRange(bStart);
- fStartIsInHnd[XTnum] = HBtab->InHndRegionBBRange(bStart);
- }
- }
-
- /* Temporarily unlink [bStart..bEnd] from the flow graph */
- const bool bStartPrevJumpsToNext = bStartPrev->KindIs(BBJ_ALWAYS) && bStartPrev->JumpsToNext();
- fgUnlinkRange(bStart, bEnd);
-
- if (insertAfterBlk == nullptr)
- {
- // Find new location for the unlinked block(s)
- // Set insertAfterBlk to the block which will precede the insertion point
-
- if (!bStart->hasTryIndex() && isRare)
- {
- // We'll just insert the blocks at the end of the method. If the method
- // has funclets, we will insert at the end of the main method but before
- // any of the funclets. Note that we create funclets before we call
- // fgReorderBlocks().
-
- insertAfterBlk = fgLastBBInMainFunction();
- noway_assert(insertAfterBlk != bPrev);
- }
- else
- {
- BasicBlock* startBlk;
- BasicBlock* lastBlk;
- EHblkDsc* ehDsc = ehInitTryBlockRange(bStart, &startBlk, &lastBlk);
-
- BasicBlock* endBlk;
-
- /* Setup startBlk and endBlk as the range to search */
-
- if (ehDsc != nullptr)
- {
- endBlk = lastBlk->Next();
-
- /*
- Multiple (nested) try regions might start from the same BB.
- For example,
-
- try3 try2 try1
- |--- |--- |--- BB01
- | | | BB02
- | | |--- BB03
- | | BB04
- | |------------ BB05
- | BB06
- |------------------- BB07
-
- Now if we want to insert in try2 region, we will start with startBlk=BB01.
- The following loop will allow us to start from startBlk==BB04.
- */
- while (!BasicBlock::sameTryRegion(startBlk, bStart) && (startBlk != endBlk))
- {
- startBlk = startBlk->Next();
- }
-
- // startBlk cannot equal endBlk as it must come before endBlk
- if (startBlk == endBlk)
- {
- goto CANNOT_MOVE;
- }
-
- // we also can't start searching the try region at bStart
- if (startBlk == bStart)
- {
- // if bEnd is the last block in the method or
- // or if bEnd->bbNext is in a different try region
- // then we cannot move the blocks
- //
- if (bEnd->IsLast() || !BasicBlock::sameTryRegion(startBlk, bEnd->Next()))
- {
- goto CANNOT_MOVE;
- }
-
- startBlk = bEnd->Next();
-
- // Check that the new startBlk still comes before endBlk
-
- // startBlk cannot equal endBlk as it must come before endBlk
- if (startBlk == endBlk)
- {
- goto CANNOT_MOVE;
- }
-
- BasicBlock* tmpBlk = startBlk;
- while ((tmpBlk != endBlk) && (tmpBlk != nullptr))
- {
- tmpBlk = tmpBlk->Next();
- }
-
- // when tmpBlk is NULL that means startBlk is after endBlk
- // so there is no way to move bStart..bEnd within the try region
- if (tmpBlk == nullptr)
- {
- goto CANNOT_MOVE;
- }
- }
- }
- else
- {
- noway_assert(isRare == false);
-
- /* We'll search through the entire main method */
- startBlk = fgFirstBB;
- endBlk = fgEndBBAfterMainFunction();
- }
-
- // Calculate nearBlk and jumpBlk and then call fgFindInsertPoint()
- // to find our insertion block
- //
- {
- // If the set of blocks that we are moving ends with a BBJ_ALWAYS to
- // another [rarely run] block that comes after bPrev (forward branch)
- // then we can set up nearBlk to eliminate this jump sometimes
- //
- BasicBlock* nearBlk = nullptr;
- BasicBlock* jumpBlk = nullptr;
-
- if (bEnd->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET) && !bEnd->JumpsToNext() &&
- (!isRare || bEnd->GetTarget()->isRunRarely()) &&
- fgIsForwardBranch(bEnd, bEnd->GetTarget(), bPrev))
- {
- // Set nearBlk to be the block in [startBlk..endBlk]
- // such that nearBlk->NextIs(bEnd->JumpDest)
- // if no such block exists then set nearBlk to NULL
- nearBlk = startBlk;
- jumpBlk = bEnd;
- do
- {
- // We do not want to set nearBlk to bPrev
- // since then we will not move [bStart..bEnd]
- //
- if (nearBlk != bPrev)
- {
- // Check if nearBlk satisfies our requirement
- if (nearBlk->NextIs(bEnd->GetTarget()))
- {
- break;
- }
- }
-
- // Did we reach the endBlk?
- if (nearBlk == endBlk)
- {
- nearBlk = nullptr;
- break;
- }
-
- // advance nearBlk to the next block
- nearBlk = nearBlk->Next();
-
- } while (nearBlk != nullptr);
- }
-
- // if nearBlk is NULL then we set nearBlk to be the
- // first block that we want to insert after.
- if (nearBlk == nullptr)
- {
- if (bDest != nullptr)
- {
- // we want to insert after bDest
- nearBlk = bDest;
- }
- else
- {
- // we want to insert after bPrev
- nearBlk = bPrev;
- }
- }
-
- /* Set insertAfterBlk to the block which we will insert after. */
-
- insertAfterBlk =
- fgFindInsertPoint(bStart->bbTryIndex,
- true, // Insert in the try region.
- startBlk, endBlk, nearBlk, jumpBlk, bStart->bbWeight == BB_ZERO_WEIGHT);
- }
-
- /* See if insertAfterBlk is the same as where we started, */
- /* or if we could not find any insertion point */
-
- if ((insertAfterBlk == bPrev) || (insertAfterBlk == nullptr))
- {
- CANNOT_MOVE:;
- /* We couldn't move the blocks, so put everything back */
- /* relink [bStart .. bEnd] into the flow graph */
-
- bPrev->SetNext(bStart);
- if (!bEnd->IsLast())
- {
- bEnd->Next()->SetPrev(bEnd);
- }
-#ifdef DEBUG
- if (verbose)
- {
- if (bStart != bEnd)
- {
- printf("Could not relocate blocks (" FMT_BB " .. " FMT_BB ")\n", bStart->bbNum,
- bEnd->bbNum);
- }
- else
- {
- printf("Could not relocate block " FMT_BB "\n", bStart->bbNum);
- }
- }
-#endif // DEBUG
- continue;
- }
- }
- }
-
- noway_assert(insertAfterBlk != nullptr);
- noway_assert(bStartPrev != nullptr);
- noway_assert(bStartPrev != insertAfterBlk);
-
-#ifdef DEBUG
- movedBlocks = true;
-
- if (verbose)
- {
- const char* msg;
- if (bStart2 != nullptr)
- {
- msg = "hot";
- }
- else
- {
- if (isRare)
- {
- msg = "rarely run";
- }
- else
- {
- msg = "uncommon";
- }
- }
-
- printf("Relocated %s ", msg);
- if (bStart != bEnd)
- {
- printf("blocks (" FMT_BB " .. " FMT_BB ")", bStart->bbNum, bEnd->bbNum);
- }
- else
- {
- printf("block " FMT_BB, bStart->bbNum);
- }
-
- if (bPrev->KindIs(BBJ_COND))
- {
- printf(" by reversing conditional jump at " FMT_BB "\n", bPrev->bbNum);
- }
- else
- {
- printf("\n", bPrev->bbNum);
- }
- }
-#endif // DEBUG
-
- if (bPrev->KindIs(BBJ_COND))
- {
- /* Reverse the bPrev jump condition */
- Statement* const condTestStmt = bPrev->lastStmt();
- GenTree* const condTest = condTestStmt->GetRootNode();
-
- noway_assert(condTest->gtOper == GT_JTRUE);
- condTest->AsOp()->gtOp1 = gtReverseCond(condTest->AsOp()->gtOp1);
-
- FlowEdge* const trueEdge = bPrev->GetTrueEdge();
- FlowEdge* const falseEdge = bPrev->GetFalseEdge();
- bPrev->SetTrueEdge(falseEdge);
- bPrev->SetFalseEdge(trueEdge);
-
- // may need to rethread
- //
- if (fgNodeThreading == NodeThreading::AllTrees)
- {
- JITDUMP("Rethreading " FMT_STMT "\n", condTestStmt->GetID());
- gtSetStmtInfo(condTestStmt);
- fgSetStmtSeq(condTestStmt);
- }
-
- if (bStart2 != nullptr)
- {
- noway_assert(insertAfterBlk == bPrev);
- noway_assert(insertAfterBlk->NextIs(block));
- }
- }
-
- // If we are moving blocks that are at the end of a try or handler
- // we will need to shorten ebdTryLast or ebdHndLast
- //
- ehUpdateLastBlocks(bEnd, bStartPrev);
-
- // If we are moving blocks into the end of a try region or handler region
- // we will need to extend ebdTryLast or ebdHndLast so the blocks that we
- // are moving are part of this try or handler region.
- //
- for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
- {
- // Are we moving blocks to the end of a try region?
- if (HBtab->ebdTryLast == insertAfterBlk)
- {
- if (fStartIsInTry[XTnum])
- {
- // bStart..bEnd is in the try, so extend the try region
- fgSetTryEnd(HBtab, bEnd);
- }
- }
-
- // Are we moving blocks to the end of a handler region?
- if (HBtab->ebdHndLast == insertAfterBlk)
- {
- if (fStartIsInHnd[XTnum])
- {
- // bStart..bEnd is in the handler, so extend the handler region
- fgSetHndEnd(HBtab, bEnd);
- }
- }
- }
-
- /* We have decided to insert the block(s) after 'insertAfterBlk' */
- fgMoveBlocksAfter(bStart, bEnd, insertAfterBlk);
-
- if (bDest)
- {
- /* We may need to insert an unconditional branch after bPrev to bDest */
- fgConnectFallThrough(bPrev, bDest);
- }
- else
- {
- /* If bPrev falls through, we must insert a jump to block */
- fgConnectFallThrough(bPrev, block);
- }
-
- BasicBlock* bSkip = bEnd->Next();
-
- /* If bEnd falls through, we must insert a jump to bNext */
- fgConnectFallThrough(bEnd, bNext);
-
- if (bStart2 == nullptr)
- {
- /* If insertAfterBlk falls through, we are forced to */
- /* add a jump around the block(s) we just inserted */
- fgConnectFallThrough(insertAfterBlk, bSkip);
- }
- else
- {
- /* We may need to insert an unconditional branch after bPrev2 to bStart */
- fgConnectFallThrough(bPrev2, bStart);
- }
-
-#if DEBUG
- if (verbose)
- {
- printf("\nAfter this change in fgReorderBlocks the BB graph is:");
- fgDispBasicBlocks(verboseTrees);
- printf("\n");
- }
- fgVerifyHandlerTab();
-
- // Make sure that the predecessor lists are accurate
- if (expensiveDebugCheckLevel >= 2)
- {
- fgDebugCheckBBlist();
- }
-#endif // DEBUG
-
- // Set our iteration point 'block' to be the new bPrev->bbNext
- // It will be used as the next bPrev
- block = bPrev->Next();
-
- } // end of for loop(bPrev,block)
-
- const bool changed = movedBlocks || newRarelyRun || optimizedSwitches || optimizedBranches;
-
- if (changed)
- {
-#if DEBUG
- // Make sure that the predecessor lists are accurate
- if (expensiveDebugCheckLevel >= 2)
- {
- fgDebugCheckBBlist();
- }
-#endif // DEBUG
- }
-
- return changed;
-}
-#ifdef _PREFAST_
-#pragma warning(pop)
-#endif
-
//-----------------------------------------------------------------------------
// Compiler::ThreeOptLayout::EdgeCmp: Comparator for the 'cutPoints' priority queue.
// If 'left' has a bigger edge weight than 'right', 3-opt will consider it first.
diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp
index c900ae003fe1dc..08ac83188ccf85 100644
--- a/src/coreclr/jit/flowgraph.cpp
+++ b/src/coreclr/jit/flowgraph.cpp
@@ -1463,6 +1463,7 @@ void Compiler::fgAddSyncMethodEnterExit()
// Initialize the new entry
+ newEntry->ebdID = impInlineRoot()->compEHID++;
newEntry->ebdHandlerType = EH_HANDLER_FAULT;
newEntry->ebdTryBeg = tryBegBB;
@@ -1963,13 +1964,12 @@ class MergedReturns
// True if any returns were impacted.
//
// Notes:
- // The goal is to set things up favorably for a reasonable layout without
- // putting too much burden on fgReorderBlocks; in particular, since that
- // method doesn't (currently) shuffle non-profile, non-rare code to create
- // fall-through and reduce gotos, this method places each const return
- // block immediately after its last predecessor, so that the flow from
- // there to it can become fallthrough without requiring any motion to be
- // performed by fgReorderBlocks.
+ // Prematurely optimizing the block layout is unnecessary.
+ // However, 'ReturnCountHardLimit' is small enough such that
+ // any throughput savings from skipping this pass are negated
+ // by the need to emit branches to these blocks in MinOpts.
+ // If we decide to increase the number of epilogues allowed,
+ // we should consider removing this pass.
//
bool PlaceReturns()
{
@@ -2808,6 +2808,58 @@ bool Compiler::fgSimpleLowerCastOfSmpOp(LIR::Range& range, GenTreeCast* cast)
return false;
}
+//------------------------------------------------------------------------
+// fgSimpleLowerBswap16 : Optimization to remove CAST nodes from operands of small ops that depents on
+// lower bits only (currently only BSWAP16).
+// Example:
+// BSWAP16(CAST(x)) transforms to BSWAP16(x)
+//
+// Returns:
+// True or false, representing changes were made.
+//
+// Notes:
+// This optimization could be done in morph, but it cannot because there are correctness
+// problems with NOLs (normalized-on-load locals) and how they are handled in VN.
+// Simple put, you cannot remove a CAST from CAST(LCL_VAR{nol}) in HIR.
+//
+// Because the optimization happens during rationalization, turning into LIR, it is safe to remove the CAST.
+//
+bool Compiler::fgSimpleLowerBswap16(LIR::Range& range, GenTree* op)
+{
+ assert(op->OperIs(GT_BSWAP16));
+
+ if (opts.OptimizationDisabled())
+ return false;
+
+ // When openrand is a integral cast
+ // When both source and target sizes are at least the operation size
+ bool madeChanges = false;
+
+ if (op->gtGetOp1()->OperIs(GT_CAST))
+ {
+ GenTreeCast* op1 = op->gtGetOp1()->AsCast();
+
+ if (!op1->gtOverflow() && (genTypeSize(op1->CastToType()) >= 2) &&
+ genActualType(op1->CastFromType()) == TYP_INT)
+ {
+ // This cast does not affect the lower 16 bits. It can be removed.
+ op->AsOp()->gtOp1 = op1->CastOp();
+ range.Remove(op1);
+ madeChanges = true;
+ }
+ }
+
+#ifdef DEBUG
+ if (madeChanges)
+ {
+ JITDUMP("Lower - Downcast of Small Op %s:\n", GenTree::OpName(op->OperGet()));
+ DISPTREE(op);
+ }
+#endif // DEBUG
+
+ return madeChanges;
+}
+
//------------------------------------------------------------------------------
// fgGetDomSpeculatively: Try determine a more accurate dominator than cached bbIDom
//
@@ -6140,14 +6192,30 @@ bool FlowGraphNaturalLoop::CanDuplicateWithEH(INDEBUG(const char** reason))
// Check if this is an "outermost" try within the loop.
// If so, we have more checking to do later on.
//
- const bool headerInTry = header->hasTryIndex();
- unsigned blockIndex = block->getTryIndex();
- unsigned outermostBlockIndex = comp->ehTrueEnclosingTryIndex(blockIndex);
+ bool const headerIsInTry = header->hasTryIndex();
+ unsigned const blockTryIndex = block->getTryIndex();
+ unsigned const enclosingTryIndex = comp->ehTrueEnclosingTryIndex(blockTryIndex);
- if ((headerInTry && (outermostBlockIndex == header->getTryIndex())) ||
- (!headerInTry && (outermostBlockIndex == EHblkDsc::NO_ENCLOSING_INDEX)))
+ if ((headerIsInTry && (enclosingTryIndex == header->getTryIndex())) ||
+ (!headerIsInTry && (enclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)))
{
- tryRegionsToClone.Push(block);
+ // When we clone a try we also clone its handler.
+ //
+ // This try may be enclosed in a handler whose try begin is in the loop.
+ // If so we'll clone this try when we clone (the handler of) that try.
+ //
+ bool isInHandlerOfInLoopTry = false;
+ if (block->hasHndIndex())
+ {
+ unsigned const enclosingHndIndex = block->getHndIndex();
+ BasicBlock* const associatedTryBeg = comp->ehGetDsc(enclosingHndIndex)->ebdTryBeg;
+ isInHandlerOfInLoopTry = this->ContainsBlock(associatedTryBeg);
+ }
+
+ if (!isInHandlerOfInLoopTry)
+ {
+ tryRegionsToClone.Push(block);
+ }
}
}
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 86357b0536c09b..022c750bdc89ea 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -12333,7 +12333,7 @@ void Compiler::gtDispLeaf(GenTree* tree, IndentStack* indentStack)
#if defined(FEATURE_EH_WINDOWS_X86)
case GT_END_LFIN:
- printf(" endNstLvl=%d", tree->AsVal()->gtVal1);
+ printf(" ehID=%d", tree->AsVal()->gtVal1);
break;
#endif // FEATURE_EH_WINDOWS_X86
@@ -18412,9 +18412,14 @@ unsigned GenTreeVecCon::ElementCount(unsigned simdSize, var_types simdBaseType)
return simdSize / genTypeSize(simdBaseType);
}
-bool Compiler::IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_types simdBaseType) const
+bool Compiler::IsValidForShuffle(
+ GenTree* indices, unsigned simdSize, var_types simdBaseType, bool* canBecomeValid, bool isShuffleNative) const
{
#if defined(TARGET_XARCH)
+ if (canBecomeValid != nullptr)
+ {
+ *canBecomeValid = false;
+ }
size_t elementSize = genTypeSize(simdBaseType);
size_t elementCount = simdSize / elementSize;
@@ -18426,45 +18431,10 @@ bool Compiler::IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_t
// it's likely not worth it overall given that IsHardwareAccelerated reports false
return false;
}
- else if ((varTypeIsByte(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI_VL)) ||
- (varTypeIsShort(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512BW_VL)))
- {
- bool crossLane = false;
-
- for (size_t index = 0; index < elementCount; index++)
- {
- uint64_t value = vecCon->GetIntegralVectorConstElement(index, simdBaseType);
-
- if (value >= elementCount)
- {
- continue;
- }
-
- if (index < (elementCount / 2))
- {
- if (value >= (elementCount / 2))
- {
- crossLane = true;
- break;
- }
- }
- else if (value < (elementCount / 2))
- {
- crossLane = true;
- break;
- }
- }
-
- if (crossLane)
- {
- // TODO-XARCH-CQ: We should emulate cross-lane shuffling for byte/sbyte and short/ushort
- return false;
- }
- }
}
else if (simdSize == 64)
{
- if (varTypeIsByte(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI))
+ if (varTypeIsByte(simdBaseType) && (!compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI)))
{
// TYP_BYTE, TYP_UBYTE need AVX512VBMI.
return false;
@@ -18474,14 +18444,43 @@ bool Compiler::IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_t
{
assert(simdSize == 16);
- if (varTypeIsSmall(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_SSSE3))
+ if (varTypeIsSmall(simdBaseType) && (!compOpportunisticallyDependsOn(InstructionSet_SSSE3)))
{
// TYP_BYTE, TYP_UBYTE, TYP_SHORT, and TYP_USHORT need SSSE3 to be able to shuffle any operation
return false;
}
+
+ bool isVariableShuffle = !indices->IsCnsVec();
+ if ((!isVariableShuffle) && isShuffleNative)
+ {
+ // ShuffleNative with constant indices with 1 or more out of range indices is emitted as variable indices.
+ for (size_t index = 0; index < elementCount; index++)
+ {
+ uint64_t value = indices->GetIntegralVectorConstElement(index, simdBaseType);
+ if (value >= elementCount)
+ {
+ isVariableShuffle = true;
+ break;
+ }
+ }
+ }
+ if (isVariableShuffle && (!compOpportunisticallyDependsOn(InstructionSet_SSSE3)))
+ {
+ // the variable implementation for Vector128 Shuffle always needs SSSE3
+ // however, this can become valid later if it becomes constant
+ if (canBecomeValid != nullptr)
+ {
+ *canBecomeValid = true;
+ }
+ return false;
+ }
}
#endif // TARGET_XARCH
+ if (canBecomeValid != nullptr)
+ {
+ *canBecomeValid = true;
+ }
return true;
}
@@ -20260,7 +20259,7 @@ var_types GenTreeJitIntrinsic::GetSimdBaseType() const
// isCommutativeHWIntrinsic: Checks if the intrinsic is commutative
//
// Return Value:
-// true if the intrisic is commutative
+// true if the intrinsic is commutative
//
bool GenTree::isCommutativeHWIntrinsic() const
{
@@ -20422,6 +20421,9 @@ bool GenTree::isContainableHWIntrinsic() const
return true;
}
+ case NI_Vector128_CreateScalar:
+ case NI_Vector256_CreateScalar:
+ case NI_Vector512_CreateScalar:
case NI_Vector128_CreateScalarUnsafe:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_CreateScalarUnsafe:
@@ -21568,7 +21570,7 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType s
#if defined(FEATURE_MASKED_HW_INTRINSICS)
//------------------------------------------------------------------------
-// gtNewSimdCvtMaskToVectorNode: Convert a HW instrinsic mask node to a vector
+// gtNewSimdCvtMaskToVectorNode: Convert a HW intrinsic mask node to a vector
//
// Arguments:
// type -- The type of the node to convert to
@@ -21993,7 +21995,7 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type,
#if defined(FEATURE_MASKED_HW_INTRINSICS)
//------------------------------------------------------------------------
-// gtNewSimdCvtVectorToMaskNode: Convert a HW instrinsic vector node to a mask
+// gtNewSimdCvtVectorToMaskNode: Convert a HW intrinsic vector node to a mask
//
// Arguments:
// type -- The type of the mask to produce.
@@ -22747,16 +22749,6 @@ GenTree* Compiler::gtNewSimdCreateBroadcastNode(var_types type,
}
#if defined(TARGET_XARCH)
-#if defined(TARGET_X86)
- if (varTypeIsLong(simdBaseType) && !op1->IsIntegralConst())
- {
- // TODO-XARCH-CQ: It may be beneficial to emit the movq
- // instruction, which takes a 64-bit memory address and
- // works on 32-bit x86 systems.
- unreached();
- }
-#endif // TARGET_X86
-
if (simdSize == 64)
{
hwIntrinsicID = NI_Vector512_Create;
@@ -22860,16 +22852,6 @@ GenTree* Compiler::gtNewSimdCreateScalarNode(var_types type,
}
#if defined(TARGET_XARCH)
-#if defined(TARGET_X86)
- if (varTypeIsLong(simdBaseType) && !op1->IsIntegralConst())
- {
- // TODO-XARCH-CQ: It may be beneficial to emit the movq
- // instruction, which takes a 64-bit memory address and
- // works on 32-bit x86 systems.
- unreached();
- }
-#endif // TARGET_X86
-
if (simdSize == 32)
{
hwIntrinsicID = NI_Vector256_CreateScalar;
@@ -23005,16 +22987,6 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode(var_types type,
}
#if defined(TARGET_XARCH)
-#if defined(TARGET_X86)
- if (varTypeIsLong(simdBaseType) && !op1->IsIntegralConst())
- {
- // TODO-XARCH-CQ: It may be beneficial to emit the movq
- // instruction, which takes a 64-bit memory address and
- // works on 32-bit x86 systems.
- unreached();
- }
-#endif // TARGET_X86
-
if (simdSize == 32)
{
hwIntrinsicID = NI_Vector256_CreateScalarUnsafe;
@@ -23051,7 +23023,7 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode(var_types type,
GenTree* Compiler::gtNewSimdCreateSequenceNode(
var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize)
{
- // This effectively doees: (Indices * op2) + Create(op1)
+ // This effectively does: (Indices * op2) + Create(op1)
//
// When both op2 and op1 are constant we can fully fold this to a constant. Additionally,
// if only op2 is a constant we can simplify the computation by a lot. However, if only op1
@@ -23383,14 +23355,7 @@ GenTree* Compiler::gtNewSimdGetElementNode(
assert(varTypeIsArithmetic(simdBaseType));
#if defined(TARGET_XARCH)
- bool useToScalar = op2->IsIntegralConst(0);
-
-#if defined(TARGET_X86)
- // We handle decomposition via GetElement for simplicity
- useToScalar &= !varTypeIsLong(simdBaseType);
-#endif // TARGET_X86
-
- if (useToScalar)
+ if (op2->IsIntegralConst(0))
{
return gtNewSimdToScalarNode(type, op1, simdBaseJitType, simdSize);
}
@@ -25463,8 +25428,24 @@ GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, CorInfoType
return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize);
}
-GenTree* Compiler::gtNewSimdShuffleNode(
- var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize)
+//------------------------------------------------------------------------
+// gtNewSimdShuffleVariableNode: Creates a new simd shuffle node (with variable indices, or a case isn't handled in
+// gtNewSimdShuffleNode for ShuffleUnsafe with out of bounds indices) - this is a helper function for
+// gtNewSimdShuffleNode & should just be invoked by it indirectly, instead of other callers using it
+//
+// Arguments:
+// type -- The type of the node
+// op1 -- The values to shuffle
+// op2 -- The indices to pick from (variable)
+// simdBaseJitType -- The base jit type of the node
+// simdSize -- The simd size of the node
+// isShuffleNative -- Whether we're making a ShuffleNative node vs a Shuffle one
+//
+// Return Value:
+// The shuffle node
+//
+GenTree* Compiler::gtNewSimdShuffleVariableNode(
+ var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize, bool isShuffleNative)
{
assert(IsBaselineSimdIsaSupportedDebugOnly());
@@ -25474,166 +25455,948 @@ GenTree* Compiler::gtNewSimdShuffleNode(
assert(op1 != nullptr);
assert(op1->TypeIs(type));
+ var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
assert(op2 != nullptr);
assert(op2->TypeIs(type));
- assert(op2->IsCnsVec());
+ assert((!op2->IsCnsVec()) || isShuffleNative);
- var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
- assert(varTypeIsArithmetic(simdBaseType));
+ GenTree* retNode = nullptr;
+ GenTree* cnsNode = nullptr;
+
+ size_t elementSize = genTypeSize(simdBaseType);
+ size_t elementCount = simdSize / elementSize;
- if (op2->IsVectorAllBitsSet())
+ // duplicate operand 2 for non-ShuffleNative implementation later
+ // (also, on arm64, byte ShuffleNative is same as Shuffle)
+ GenTree* op2DupSafe = nullptr;
+#if defined(TARGET_XARCH)
+ if (!isShuffleNative)
+#elif defined(TARGET_ARM64)
+ if ((!isShuffleNative) && (elementSize > 1))
+#else
+#error Unsupported platform
+#endif // !TARGET_XARCH && !TARGET_ARM64
{
- // AllBitsSet represents indices that are always "out of range" which means zero should be
- // selected for every element. We can special-case this down to just returning a zero node
- return gtNewZeroConNode(type);
+ op2DupSafe = fgMakeMultiUse(&op2);
}
- if (op2->IsVectorZero())
+#if defined(TARGET_XARCH)
+ // on xarch, signed comparison is cheaper, so whenever we are able to use it in the
+ // result & (indices < elementCount) step for Shuffle, we do. Specifically, we are
+ // able to use it when the top bit causes zeroing (then we can compare indices as
+ // if they were signed, since negative cases are already handled).
+ bool canUseSignedComparisonHint = false;
+
+ // TODO-XARCH-CQ: If we have known set/unset bits for the indices, we could further optimise many cases
+ // below.
+
+ bool isV512Supported = false;
+ if (simdSize == 64)
{
- // TODO-XARCH-CQ: Zero represents indices that select the first element of op1 each time. We can simplify
- // this down to basically a broadcast equivalent.
+ if (elementSize == 1)
+ {
+ assert(compIsaSupportedDebugOnly(InstructionSet_AVX512VBMI));
+
+ // swap the operands to match the encoding requirements
+ retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512VBMI_PermuteVar64x8, simdBaseJitType, simdSize);
+ retNode->SetReverseOp();
+ }
+ else if (elementSize == 2)
+ {
+ assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW));
+
+ // swap the operands to match the encoding requirements
+ retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512BW_PermuteVar32x16, simdBaseJitType, simdSize);
+ retNode->SetReverseOp();
+ }
+ else if (elementSize == 4)
+ {
+ assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
+
+ // swap the operands to match the encoding requirements
+ retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512F_PermuteVar16x32, simdBaseJitType, simdSize);
+ retNode->SetReverseOp();
+ }
+ else
+ {
+ assert(elementSize == 8);
+ assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
+
+ // swap the operands to match the encoding requirements
+ retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512F_PermuteVar8x64, simdBaseJitType, simdSize);
+ retNode->SetReverseOp();
+ }
}
+ else if ((elementSize == 1) && (simdSize == 16))
+ {
+ assert(compIsaSupportedDebugOnly(InstructionSet_SSSE3));
- GenTree* retNode = nullptr;
- GenTreeIntConCommon* cnsNode = nullptr;
+ retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSSE3_Shuffle, simdBaseJitType, simdSize);
- size_t elementSize = genTypeSize(simdBaseType);
- size_t elementCount = simdSize / elementSize;
+ // high bit on index gives 0 already
+ canUseSignedComparisonHint = true;
+ }
+ else if ((elementSize == 1) && (simdSize == 32) &&
+ compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512VBMI_VL))
+ {
+ NamedIntrinsic intrinsic = isV512Supported ? NI_AVX512VBMI_VL_PermuteVar32x8 : NI_AVX10v1_PermuteVar32x8;
-#if defined(TARGET_XARCH)
- uint8_t control = 0;
- bool crossLane = false;
- bool needsZero = varTypeIsSmall(simdBaseType) && (simdSize <= 16);
- uint64_t value = 0;
- simd_t vecCns = {};
- simd_t mskCns = {};
+ // swap the operands to match the encoding requirements
+ retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, intrinsic, simdBaseJitType, simdSize);
+ retNode->SetReverseOp();
+ }
+ else if ((elementSize == 2) && compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512BW_VL))
+ {
+ assert((simdSize == 16) || (simdSize == 32));
+ NamedIntrinsic intrinsic;
+ if (isV512Supported)
+ {
+ intrinsic = (simdSize == 16) ? NI_AVX512BW_VL_PermuteVar8x16 : NI_AVX512BW_VL_PermuteVar16x16;
+ }
+ else
+ {
+ intrinsic = (simdSize == 16) ? NI_AVX10v1_PermuteVar8x16 : NI_AVX10v1_PermuteVar16x16;
+ }
- for (size_t index = 0; index < elementCount; index++)
+ // swap the operands to match the encoding requirements
+ retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, intrinsic, simdBaseJitType, simdSize);
+ retNode->SetReverseOp();
+ }
+ else if ((elementSize == 4) && ((simdSize == 32) || compOpportunisticallyDependsOn(InstructionSet_AVX)))
{
- value = op2->GetIntegralVectorConstElement(index, simdBaseType);
+ assert((simdSize == 16) || (simdSize == 32));
- if (value < elementCount)
+ if (simdSize == 32)
+ {
+ assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));
+
+ // swap the operands to match the encoding requirements
+ retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX2_PermuteVar8x32, simdBaseJitType, simdSize);
+ retNode->SetReverseOp();
+ }
+ else
+ {
+ assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
+
+ retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX_PermuteVar, CORINFO_TYPE_FLOAT, simdSize);
+ }
+ }
+ else if ((elementSize == 8) && (simdSize == 32) &&
+ compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512F_VL))
+ {
+ NamedIntrinsic intrinsic = isV512Supported ? NI_AVX512F_VL_PermuteVar4x64 : NI_AVX10v1_PermuteVar4x64;
+
+ // swap the operands to match the encoding requirements
+ retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, intrinsic, simdBaseJitType, simdSize);
+ retNode->SetReverseOp();
+ }
+ else if ((elementSize == 8) && (simdSize == 16) &&
+ compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512F_VL))
+ {
+ GenTree* op1Copy = fgMakeMultiUse(&op1); // just use op1 again for the other variable
+ NamedIntrinsic intrinsic = isV512Supported ? NI_AVX512F_VL_PermuteVar2x64x2 : NI_AVX10v1_PermuteVar2x64x2;
+ retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, op1Copy, intrinsic, simdBaseJitType, simdSize);
+ }
+ else
+ {
+ assert(((elementSize == 1) && (simdSize == 32)) || (elementSize == 2) ||
+ ((elementSize == 4) && (simdSize == 16)) || (elementSize == 8));
+
+ if ((elementSize == 8) && ((simdSize == 32) || compOpportunisticallyDependsOn(InstructionSet_AVX)))
{
+ assert((simdSize == 16) || (simdSize == 32));
+ if (simdSize == 32)
+ {
+ assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));
+ }
+ else
+ {
+ assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
+ }
+
+ // the below is implemented for integral types
+ if (varTypeIsFloating(simdBaseType))
+ {
+ assert(elementSize == 8);
+ simdBaseJitType = CORINFO_TYPE_LONG;
+ }
+
+ // shift all indices to the left by 1 (long to int index, first step of converting long->int indices)
+ cnsNode = gtNewIconNode(1, TYP_INT);
if (simdSize == 32)
{
- // Most of the 256-bit shuffle/permute instructions operate as if
- // the inputs were 2x 128-bit values. If the selected indices cross
- // the respective 128-bit "lane" we may need to specialize the codegen
+ op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_AVX2_ShiftLeftLogical, simdBaseJitType, simdSize);
+ }
+ else
+ {
+ op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_SSE2_ShiftLeftLogical, simdBaseJitType, simdSize);
+ }
+
+ // the below are implemented with float/int/uint
+ simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_INT;
+ if (varTypeIsFloating(simdBaseType))
+ {
+ simdBaseJitType = CORINFO_TYPE_FLOAT;
+ }
- if (index < (elementCount / 2))
+ // shuffle & manipulate the long indices to int indices (e.g., 3 2 1 0 -> 6 7 4 5 2 3 0 1)
+ unsigned immediate = 0b10100000;
+ cnsNode = gtNewIconNode(immediate);
+ if (simdSize == 32)
+ {
+ if (varTypeIsFloating(simdBaseType))
+ {
+ GenTree* op2Dup = fgMakeMultiUse(&op2);
+ op2 =
+ gtNewSimdHWIntrinsicNode(type, op2, op2Dup, cnsNode, NI_AVX_Shuffle, simdBaseJitType, simdSize);
+ }
+ else
+ {
+ op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_AVX2_Shuffle, simdBaseJitType, simdSize);
+ }
+ }
+ else
+ {
+ if (varTypeIsFloating(simdBaseType))
{
- crossLane |= (value >= (elementCount / 2));
+ GenTree* op2Dup = fgMakeMultiUse(&op2);
+ op2 =
+ gtNewSimdHWIntrinsicNode(type, op2, op2Dup, cnsNode, NI_SSE_Shuffle, simdBaseJitType, simdSize);
}
else
{
- crossLane |= (value < (elementCount / 2));
+ op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_SSE2_Shuffle, simdBaseJitType, simdSize);
}
}
- // Setting the control for byte/sbyte and short/ushort is unnecessary
- // and will actually compute an incorrect control word. But it simplifies
- // the overall logic needed here and will remain unused.
+ simd_t orCns = {};
+ for (size_t index = 0; index < simdSize / 4; index++)
+ {
+ orCns.u32[index] = index & 1;
+ }
- control |= (value << (index * (elementCount / 2)));
+ cnsNode = gtNewVconNode(type);
+ cnsNode->AsVecCon()->gtSimdVal = orCns;
- // When Ssse3 is supported, we may need vecCns to accurately select the relevant
- // bytes if some index is outside the valid range. Since x86/x64 is little-endian
- // we can simplify this down to a for loop that scales the value and selects count
- // sequential bytes.
+ op2 = gtNewSimdBinOpNode(GT_OR, type, op2, cnsNode, simdBaseJitType, simdSize);
- for (uint32_t i = 0; i < elementSize; i++)
+ // perform the shuffle with our int indices
+ if (simdSize == 32)
{
- vecCns.u8[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i);
-
- // When Ssse3 is not supported, we need to adjust the constant to be AllBitsSet
- // so that we can emit a ConditionalSelect(op2, retNode, zeroNode).
-
- mskCns.u8[(index * elementSize) + i] = 0xFF;
+ // swap the operands to match the encoding requirements
+ retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX2_PermuteVar8x32, simdBaseJitType, simdSize);
+ retNode->SetReverseOp();
+ }
+ else
+ {
+ retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX_PermuteVar, CORINFO_TYPE_FLOAT, simdSize);
}
}
- else
+ else if (simdSize == 32)
{
- needsZero = true;
+ assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));
+ assert(elementSize <= 2);
- // When Ssse3 is supported, we may need vecCns to accurately select the relevant
- // bytes if some index is outside the valid range. We can do this by just zeroing
- // out each byte in the element. This only requires the most significant bit to be
- // set, but we use 0xFF instead since that will be the equivalent of AllBitsSet
+ // the idea is the following (for bytes, short indices are first converted to byte indices):
+ // 1. we have vector, and we create a vectorSwapped from it (for which the 128-bit lanes are swapped)
+ // 2. we then shuffle each using Avx2.Shuffle
+ // 3. we now have vector and vectorSwapped shuffled with Avx2.Shuffle - which only shuffles within the lane
+ // 4. for Shuffle, invalid indices are explicitly zeroed later, so no need to worry about anything outside
+ // [0, 31], and for ShuffleNative, we don't guarantee any particular index (or 0 value) is selected.
+ // 5. since we only care about [0, 31], notably we have that for each element either vector or vectorSwapped
+ // (not both) will have the value we actually want, since one is effectively index A = i & 0x0F, and
+ // the other is effectively B = (i & 0x0F) | 0x10. (vector is A for left lane and B for right lane,
+ // and vectorSwapped is B for left lane and A for right lane)
+ // 6. we can use a conditional select to get the appropriate value if we know what mask to use.
+ // 7. we can use the following mask:
+ // (indices ^ V256.Create(V128.Create((byte)0), V128.Create((byte)0x10))) > V256.Create((byte)0x0F)
+ // since this detects whether the index value is in the same lane as V256.Indices
+ // would be (which we know we can always use vector for). this is because it normalises the 0x10 bit
+ // to mean '0 = in vector, 1 = in vectorSwapped', and then we can use > 0x0F to detect when this is
+ // the case (we use > on sbyte, since it is the primitive operation on x86/x64 avx2 hardware).
+ // 8. for Shuffle, we explicitly normalise the out of range indices later, so we are done.
- for (uint32_t i = 0; i < elementSize; i++)
+ // high bit on index gives 0 already
+ if (elementSize == 1)
{
- vecCns.u8[(index * elementSize) + i] = 0xFF;
-
- // When Ssse3 is not supported, we need to adjust the constant to be Zero
- // so that we can emit a ConditionalSelect(op2, retNode, zeroNode).
-
- mskCns.u8[(index * elementSize) + i] = 0x00;
+ canUseSignedComparisonHint = true;
}
- }
- }
- if (simdSize == 32)
- {
- assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));
- bool isV512Supported = false;
- if ((varTypeIsByte(simdBaseType) &&
- !compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512VBMI_VL)) ||
- (varTypeIsShort(simdBaseType) &&
- !compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512BW_VL)))
- {
- if (crossLane)
- {
- // TODO-XARCH-CQ: We should emulate cross-lane shuffling for byte/sbyte and short/ushort
- unreached();
- }
+ // declare required clones of op2
+ GenTree *op2Dup1, *op2Dup2;
- // If we aren't crossing lanes, then we can decompose the byte/sbyte
- // and short/ushort operations into 2x 128-bit operations
+ // if we have elementSize > 1, we need to convert op2 (short indices) to byte indices
+ if (elementSize > 1)
+ {
+ // shift all indices to the left by tzcnt(size) = 1
+ cnsNode = gtNewIconNode(1, TYP_INT);
+ op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_AVX2_ShiftLeftLogical, simdBaseJitType, simdSize);
- // We want to build what is essentially the following managed code:
- // var op1Lower = op1.GetLower();
- // op1Lower = Ssse3.Shuffle(op1Lower, Vector128.Create(...));
- //
- // var op1Upper = op1.GetUpper();
- // op1Upper = Ssse3.Shuffle(op1Upper, Vector128.Create(...));
- //
- // return Vector256.Create(op1Lower, op1Upper);
+ // the below are implemented with byte/sbyte
+ simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE;
- simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE;
+ // shuffle with a pattern like 0 0 2 2 4 4 6 6 ... 0 0 2 2 ... (for shorts)
+ // (note: the 0x10 bit is ignored for Avx2.Shuffle)
+ simd_t shufCns = {};
+ shufCns.u64[0] = 0x0606040402020000;
+ shufCns.u64[1] = 0x0E0E0C0C0A0A0808;
+ shufCns.u64[2] = 0x0606040402020000;
+ shufCns.u64[3] = 0x0E0E0C0C0A0A0808;
- GenTree* op1Dup = fgMakeMultiUse(&op1);
- GenTree* op1Lower = gtNewSimdGetLowerNode(TYP_SIMD16, op1, simdBaseJitType, simdSize);
+ cnsNode = gtNewVconNode(type);
+ cnsNode->AsVecCon()->gtSimdVal = shufCns;
- op2 = gtNewVconNode(TYP_SIMD16);
- op2->AsVecCon()->gtSimd16Val = vecCns.v128[0];
+ op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_AVX2_Shuffle, simdBaseJitType, simdSize);
- op1Lower = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Lower, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16);
+ // or every second index with 1 (short)
+ simd_t orCns = {};
+ for (size_t index = 0; index < simdSize; index++)
+ {
+ orCns.u8[index] = static_cast(index & (elementSize - 1));
+ }
- GenTree* op1Upper = gtNewSimdGetUpperNode(TYP_SIMD16, op1Dup, simdBaseJitType, simdSize);
+ cnsNode = gtNewVconNode(type);
+ cnsNode->AsVecCon()->gtSimdVal = orCns;
- op2 = gtNewVconNode(TYP_SIMD16);
- op2->AsVecCon()->gtSimd16Val = vecCns.v128[1];
+ op2 = gtNewSimdBinOpNode(GT_OR, type, op2, cnsNode, simdBaseJitType, simdSize);
- op1Upper = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Upper, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16);
+ // create required clones of op2
+ op2Dup1 = fgMakeMultiUse(&op2);
+ op2Dup2 = gtCloneExpr(op2Dup1);
+ }
- return gtNewSimdWithUpperNode(type, op1Lower, op1Upper, simdBaseJitType, simdSize);
- }
+ else
+ {
+ // create required clones of op2
+ op2Dup1 = (op2DupSafe != nullptr) ? gtCloneExpr(op2DupSafe) : fgMakeMultiUse(&op2);
+ op2Dup2 = gtCloneExpr(op2Dup1);
+ }
- if (elementSize == 4)
+ // swap the low and high 128-bit lanes
+ // Vector256 swap = Avx2.Permute2x128(vector, vector, 0b00000001);
+ GenTree* swap;
+ if (!op1->IsCnsVec())
+ {
+ GenTree* op1Dup1 = fgMakeMultiUse(&op1);
+ GenTree* op1Dup2 = gtCloneExpr(op1Dup1);
+
+ uint8_t control = 1;
+ cnsNode = gtNewIconNode(control, TYP_INT);
+ swap = gtNewSimdHWIntrinsicNode(type, op1Dup1, op1Dup2, cnsNode, NI_AVX2_Permute2x128, simdBaseJitType,
+ simdSize);
+ }
+ else
+ {
+ // if we have a constant, keep it constant
+ GenTree* op1Dup1 = fgMakeMultiUse(&op1);
+ swap = op1Dup1;
+
+ simd_t* cnsPtr = &op1Dup1->AsVecCon()->gtSimdVal;
+ std::swap(cnsPtr->u64[0], cnsPtr->u64[2]);
+ std::swap(cnsPtr->u64[1], cnsPtr->u64[3]);
+ }
+
+ // shuffle with both the normal and swapped values
+ // Vector256 shuf1 = Avx2.Shuffle(vector, indices);
+ // Vector256 shuf2 = Avx2.Shuffle(swap, indices);
+ GenTree* shuf1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX2_Shuffle, simdBaseJitType, simdSize);
+ GenTree* shuf2 = gtNewSimdHWIntrinsicNode(type, swap, op2Dup1, NI_AVX2_Shuffle, simdBaseJitType, simdSize);
+
+ // get the indices, and xor the cross-lane bit on the high 128-bit lane part of indices.
+ // V256 indicesXord = indices ^ V256.Create(V128.Create((byte)0), V128.Create((byte)0x10)));
+ simd_t xorCns = {};
+ xorCns.u64[0] = 0;
+ xorCns.u64[1] = 0;
+ xorCns.u64[2] = 0x1010101010101010;
+ xorCns.u64[3] = 0x1010101010101010;
+ cnsNode = gtNewVconNode(type);
+ cnsNode->AsVecCon()->gtSimdVal = xorCns;
+
+ GenTree* indicesXord = gtNewSimdBinOpNode(GT_XOR, type, op2Dup2, cnsNode, simdBaseJitType, simdSize);
+
+ // compare our modified indices to 0x0F (highest value not swapping lane), we get 0xFF when we are swapping
+ // lane and 0x00 otherwise. we will also get "swapping lane" also when index is more than 32
+ // (but no high bit), but this is normalised later for Shuffle, and acceptable for ShuffleNative.
+ // V256 selection = Avx2.CompareGreaterThan(indicesXord.AsSByte(), V256.Create((sbyte)0x0F)).AsByte();
+ simd_t comparandCnd = {};
+ comparandCnd.u64[0] = 0x0F0F0F0F0F0F0F0F;
+ comparandCnd.u64[1] = 0x0F0F0F0F0F0F0F0F;
+ comparandCnd.u64[2] = 0x0F0F0F0F0F0F0F0F;
+ comparandCnd.u64[3] = 0x0F0F0F0F0F0F0F0F;
+ cnsNode = gtNewVconNode(type);
+ cnsNode->AsVecCon()->gtSimdVal = comparandCnd;
+ GenTree* selection = gtNewSimdCmpOpNode(GT_GT, type, indicesXord, cnsNode, CORINFO_TYPE_BYTE, simdSize);
+
+ // blend our two shuffles based on whether each element swaps lanes or not
+ // return Avx2.BlendVariable(shuf1, shuf2, selection);
+ retNode = gtNewSimdHWIntrinsicNode(type, shuf1, shuf2, selection, NI_AVX2_BlendVariable, simdBaseJitType,
+ simdSize);
+ }
+ else
{
- for (uint32_t i = 0; i < elementCount; i++)
+ assert(compIsaSupportedDebugOnly(InstructionSet_SSSE3));
+ assert(simdSize == 16);
+ assert(elementSize > 1);
+
+ // we want to convert our non-byte indices to byte indices,
+ // e.g., 3 2 1 0 (int) -> 12 13 14 15 8 9 10 11 4 5 6 7 0 1 2 3 (byte)
+
+ // the below is implemented for integral types
+ if (varTypeIsFloating(simdBaseType))
{
- vecCns.u32[i] = (uint8_t)(vecCns.u8[i * elementSize] / elementSize);
+ if (elementSize == 4)
+ {
+ simdBaseJitType = CORINFO_TYPE_UINT;
+ }
+ else
+ {
+ assert(elementSize == 8);
+ simdBaseJitType = CORINFO_TYPE_ULONG;
+ }
}
- op2 = gtNewVconNode(type);
- op2->AsVecCon()->gtSimdVal = vecCns;
+ // shift all indices to the left by tzcnt(size)
+ cnsNode = gtNewIconNode(BitOperations::TrailingZeroCount(static_cast(elementSize)), TYP_INT);
+ op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_SSE2_ShiftLeftLogical, simdBaseJitType, simdSize);
- // swap the operands to match the encoding requirements
- retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX2_PermuteVar8x32, simdBaseJitType, simdSize);
+ // the below are implemented with byte/sbyte
+ simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE;
+
+ // we need to convert the indices to byte indices
+ // shuffle with a pattern like 0 0 2 2 4 4 6 6 ... (for short, and similar for larger)
+
+ simd_t shufCns = {};
+ for (size_t index = 0; index < elementCount; index++)
+ {
+ for (size_t i = 0; i < elementSize; i++)
+ {
+ shufCns.u8[(index * elementSize) + i] = static_cast(index * elementSize);
+ }
+ }
+
+ cnsNode = gtNewVconNode(type);
+ cnsNode->AsVecCon()->gtSimdVal = shufCns;
+
+ op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_SSSE3_Shuffle, simdBaseJitType, simdSize);
+
+ // or the relevant bits
+
+ simd_t orCns = {};
+ for (size_t index = 0; index < simdSize; index++)
+ {
+ orCns.u8[index] = static_cast(index & (elementSize - 1));
+ }
+
+ cnsNode = gtNewVconNode(type);
+ cnsNode->AsVecCon()->gtSimdVal = orCns;
+
+ op2 = gtNewSimdBinOpNode(GT_OR, type, op2, cnsNode, simdBaseJitType, simdSize);
+
+ // apply normal byte shuffle now that we've converted it
+
+ retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSSE3_Shuffle, simdBaseJitType, simdSize);
+ }
+ }
+#elif defined(TARGET_ARM64)
+ NamedIntrinsic lookupIntrinsic = NI_AdvSimd_VectorTableLookup;
+
+ if (simdSize == 16)
+ {
+ lookupIntrinsic = NI_AdvSimd_Arm64_VectorTableLookup;
+ }
+
+ // fix-up indices for non-byte sized element types:
+ // if we have short / int / long, then we want to VectorTableLookup the least-significant byte to all bytes of that
+ // index element, and then shift left by the applicable amount, then or on the bits for the elements
+ // if it's not ShuffleNative, we also need to then fix-up the out-of-range indices (only for non-byte though)
+ // e.g., 3 2 1 0 (int) -> 12 13 14 15 8 9 10 11 4 5 6 7 0 1 2 3 (byte)
+ if (elementSize > 1)
+ {
+ // AdvSimd.ShiftLeftLogical is only valid on integral types, excluding Vector128
+ if (varTypeIsFloating(simdBaseType))
+ {
+ if (elementSize == 4)
+ {
+ simdBaseJitType = CORINFO_TYPE_INT;
+ }
+ else
+ {
+ assert(elementSize == 8);
+ simdBaseJitType = CORINFO_TYPE_LONG;
+ }
+ }
+ if ((simdSize == 16) && (simdBaseJitType == CORINFO_TYPE_INT))
+ {
+ simdBaseJitType = CORINFO_TYPE_UINT;
+ }
+
+ // shift all indices to the left by tzcnt(size)
+ cnsNode = gtNewIconNode(BitOperations::TrailingZeroCount(static_cast(elementSize)), TYP_INT);
+ op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_AdvSimd_ShiftLeftLogical, simdBaseJitType, simdSize);
+
+ // VectorTableLookup is only valid on byte/sbyte
+ simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE;
+
+ simd_t shufCns = {};
+ for (size_t index = 0; index < elementCount; index++)
+ {
+ for (size_t i = 0; i < elementSize; i++)
+ {
+ shufCns.u8[(index * elementSize) + i] = static_cast(index * elementSize);
+ }
+ }
+
+ cnsNode = gtNewVconNode(type);
+ cnsNode->AsVecCon()->gtSimdVal = shufCns;
+
+ op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, lookupIntrinsic, simdBaseJitType, simdSize);
+
+ // or the relevant bits
+ simd_t orCns = {};
+ for (size_t index = 0; index < simdSize; index++)
+ {
+ orCns.u8[index] = static_cast(index & (elementSize - 1));
+ }
+
+ cnsNode = gtNewVconNode(type);
+ cnsNode->AsVecCon()->gtSimdVal = orCns;
+
+ op2 = gtNewSimdBinOpNode(GT_OR, type, op2, cnsNode, simdBaseJitType, simdSize);
+ }
+
+ retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, lookupIntrinsic, simdBaseJitType, simdSize);
+#else
+#error Unsupported platform
+#endif // !TARGET_XARCH && !TARGET_ARM64
+ assert(retNode != nullptr);
+
+#if defined(TARGET_XARCH)
+ if (!isShuffleNative)
+#elif defined(TARGET_ARM64)
+ if ((!isShuffleNative) && (elementSize > 1))
+#else
+#error Unsupported platform
+#endif // !TARGET_XARCH && !TARGET_ARM64
+ {
+ // we need to ensure indices larger than elementCount become 0 for larger element types
+
+ assert(op2DupSafe != nullptr);
+
+ // get the CorInfoType used for the index comparison
+ CorInfoType corType = CORINFO_TYPE_UBYTE;
+ if (elementSize == 2)
+ {
+ corType = CORINFO_TYPE_USHORT;
+ }
+ else if (elementSize == 4)
+ {
+ corType = CORINFO_TYPE_UINT;
+ }
+ else if (elementSize == 8)
+ {
+ corType = CORINFO_TYPE_ULONG;
+ }
+
+ // track whether we need to xor the high bit from the comparand
+ bool subComparandNode = false;
+
+#if defined(TARGET_XARCH)
+ // check if we have hardware accelerated unsigned comparison
+ bool hardwareAcceleratedUnsignedComparison =
+ (simdSize == 64) ||
+ ((elementSize < 4) && compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512BW_VL)) ||
+ ((elementSize >= 4) && compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512F_VL));
+
+ // if the hardware doesn't support direct unsigned comparison, we attempt to use signed comparison
+ if (!hardwareAcceleratedUnsignedComparison)
+ {
+ corType = CORINFO_TYPE_BYTE;
+ if (elementSize == 2)
+ {
+ corType = CORINFO_TYPE_SHORT;
+ }
+ else if (elementSize == 4)
+ {
+ corType = CORINFO_TYPE_INT;
+ }
+ else if (elementSize == 8)
+ {
+ corType = CORINFO_TYPE_LONG;
+ }
+
+ // if we can't use signed comparison for free, update the comparand and op2DupSafe appropriately.
+ // doing this manually allows the comparand to still be a constant.
+ if (!canUseSignedComparisonHint)
+ {
+ subComparandNode = true;
+ uint64_t subtractionValue = static_cast(1) << (elementSize * 8 - 1);
+ GenTree* subtraction =
+ gtNewSimdCreateBroadcastNode(type, gtNewLconNode(subtractionValue), corType, simdSize);
+
+ op2DupSafe = gtNewSimdBinOpNode(GT_SUB, type, op2DupSafe, subtraction, corType, simdSize);
+ }
+ }
+#endif
+
+ // create the comparand node
+ uint64_t comparandValue = static_cast(elementCount);
+ if (subComparandNode)
+ {
+ uint64_t subtraction = (uint64_t)1 << (elementSize * 8 - 1);
+ comparandValue -= subtraction;
+ }
+ GenTree* comparand = gtNewSimdCreateBroadcastNode(type, gtNewLconNode(comparandValue), corType, simdSize);
+
+ assert(genTypeSize(JitType2PreciseVarType(corType)) == elementSize);
+
+ // create the mask node (op2 < comparand), and the result node (mask & nativeResult)
+ GenTree* mask = gtNewSimdCmpOpNode(GT_LT, type, op2DupSafe, comparand, corType, simdSize);
+ retNode = gtNewSimdBinOpNode(GT_AND, type, retNode, mask, simdBaseJitType, simdSize);
+ }
+ else
+ {
+ assert(op2DupSafe == nullptr);
+ }
+
+ return retNode;
+}
+
+//------------------------------------------------------------------------
+// gtNewSimdShuffleNode: Creates a new simd shuffle node
+//
+// Arguments:
+// type -- The type of the node
+// op1 -- The values to shuffle
+// op2 -- The indices to pick from
+// simdBaseJitType -- The base jit type of the node
+// simdSize -- The simd size of the node
+// isShuffleNative -- Whether we're making a ShuffleNative node vs a Shuffle one
+//
+// Return Value:
+// The shuffle node
+//
+GenTree* Compiler::gtNewSimdShuffleNode(
+ var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize, bool isShuffleNative)
+{
+ assert(IsBaselineSimdIsaSupportedDebugOnly());
+
+ assert(varTypeIsSIMD(type));
+ assert(getSIMDTypeForSize(simdSize) == type);
+
+ assert(op1 != nullptr);
+ assert(op1->TypeIs(type));
+
+ assert(op2 != nullptr);
+ assert(op2->TypeIs(type));
+
+ // If op2 is not constant, call into the gtNewSimdShuffleVariableNode routine
+ if (!op2->IsCnsVec())
+ {
+ return gtNewSimdShuffleVariableNode(type, op1, op2, simdBaseJitType, simdSize, isShuffleNative);
+ }
+
+ var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
+ assert(varTypeIsArithmetic(simdBaseType));
+
+ size_t elementSize = genTypeSize(simdBaseType);
+ size_t elementCount = simdSize / elementSize;
+
+ // For ShuffleNative, delegate to the variable implementation to get the same behaviour for
+ // ShuffleNative with constant vs variable indices for free.
+ // We want ShuffleNative to be at least as good as Shuffle (at least in non out-of-range cases),
+ // so if we have all values in range, then just treat it like Shuffle.
+ // We may as well also track whether we have the identity shuffle and all out-of-range here.
+ bool gotInvalidIndex = false;
+ bool hasIdentityShuffle = true;
+ bool allOutOfRange = true;
+ for (size_t index = 0; index < elementCount; index++)
+ {
+ uint64_t value = op2->GetIntegralVectorConstElement(index, simdBaseType);
+ if (value >= elementCount)
+ {
+ gotInvalidIndex = true;
+ }
+ else
+ {
+ allOutOfRange = false;
+ }
+ if (value != static_cast(index))
+ {
+ hasIdentityShuffle = false;
+ }
+ }
+ if (isShuffleNative && gotInvalidIndex)
+ {
+ // Call variable implementation.
+ return gtNewSimdShuffleVariableNode(type, op1, op2, simdBaseJitType, simdSize, isShuffleNative);
+ }
+ if (hasIdentityShuffle)
+ {
+ // We have 0 1 2 ... for our indices, so just return op1
+ return op1;
+ }
+ if (allOutOfRange)
+ {
+ // allOutOfRange represents indices that are always "out of range" which means zero should be
+ // selected for every element. We can special-case this down to just returning a zero node
+ return gtWrapWithSideEffects(gtNewZeroConNode(type), op1, GTF_ALL_EFFECT);
+ }
+
+ if (op2->IsVectorZero())
+ {
+ // TODO-XARCH-CQ: Zero represents indices that select the first element of op1 each time. We can simplify
+ // this down to basically a broadcast equivalent.
+ }
+
+ GenTree* retNode = nullptr;
+ GenTreeIntConCommon* cnsNode = nullptr;
+
+#if defined(TARGET_XARCH)
+ uint8_t control = 0;
+ bool crossLane = false;
+ bool needsZero = varTypeIsSmall(simdBaseType) && (simdSize <= 16);
+ bool differsByLane = false;
+ uint64_t value = 0;
+ simd_t vecCns = {};
+ simd_t mskCns = {};
+
+ for (size_t index = 0; index < elementCount; index++)
+ {
+ value = op2->GetIntegralVectorConstElement(index, simdBaseType);
+
+ if (value < elementCount)
+ {
+ // Most of the 256-bit shuffle/permute instructions operate as if
+ // the inputs were 2x 128-bit values. If the selected indices cross
+ // the respective 128-bit "lane" we may need to specialize the codegen.
+ // Also, for Vector512: If we don't cross 128-bit lanes, then we can emit vpshufb
+ // instead of vperm* - which has lower latency & allows zeroing in 1 step.
+ // We also do the same for Vector256.
+
+ crossLane |= ((((uint64_t)index ^ value) * elementSize) & ~(uint64_t)15) != 0;
+
+ // Setting the control for byte/sbyte and short/ushort is unnecessary
+ // and will actually compute an incorrect control word. But it simplifies
+ // the overall logic needed here and will remain unused.
+
+ control |= (value << (index * (elementCount / 2)));
+
+ // When Ssse3 is supported, we may need vecCns to accurately select the relevant
+ // bytes if some index is outside the valid range. Since x86/x64 is little-endian
+ // we can simplify this down to a for loop that scales the value and selects count
+ // sequential bytes.
+
+ for (uint32_t i = 0; i < elementSize; i++)
+ {
+ vecCns.u8[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i);
+
+ // When Ssse3 is not supported, we need to adjust the constant to be AllBitsSet
+ // so that we can emit a ConditionalSelect(op2, retNode, zeroNode).
+
+ mskCns.u8[(index * elementSize) + i] = 0xFF;
+ }
+ }
+ else
+ {
+ needsZero = true;
+
+ // When Ssse3 is supported, we may need vecCns to accurately select the relevant
+ // bytes if some index is outside the valid range. We can do this by just zeroing
+ // out each byte in the element. This only requires the most significant bit to be
+ // set, but we use 0xFF instead since that will be the equivalent of AllBitsSet
+
+ for (uint32_t i = 0; i < elementSize; i++)
+ {
+ vecCns.u8[(index * elementSize) + i] = 0xFF;
+
+ // When Ssse3 is not supported, we need to adjust the constant to be Zero
+ // so that we can emit a ConditionalSelect(op2, retNode, zeroNode).
+
+ mskCns.u8[(index * elementSize) + i] = 0x00;
+ }
+ }
+
+ // Check if the value differs in this lane vs any other lane (note: lane is 128 bits, or 16 bytes)
+ if (index * elementSize >= 16)
+ {
+ // Check if the element, masked to the lane, is the same as the element in the same position of earlier
+ // lanes. If it differs, differsByLane will be set to true. We just compare to the first lane, as we already
+ // compared it to any other in between lanes.
+ differsByLane |= ((vecCns.u8[index * elementSize] ^ vecCns.u8[(index * elementSize) & 15]) & 15) != 0;
+ }
+ }
+
+ if (simdSize == 32)
+ {
+ assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));
+ bool isV512Supported = false;
+ if ((varTypeIsByte(simdBaseType) &&
+ (!compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512VBMI_VL))) ||
+ (varTypeIsShort(simdBaseType) &&
+ (!compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512BW_VL))) ||
+ // This condition is the condition for when we'd have to emit something slower than what we can do with
+ // NI_AVX2_Shuffle directly:
+ ((!crossLane) && (needsZero || (elementSize < 4) || ((elementSize == 4) && differsByLane))))
+ {
+ // we want to treat our type like byte here
+ simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE;
+
+ uint8_t leftWants = 0; // result left lane wants which lanes bitfield (1 - left, 2 - right)
+ uint8_t rightWants = 0; // result right lane wants which lanes bitfield (1 - left, 2 - right)
+ bool nonDefaultShuffleMask =
+ false; // tracks whether any element in vecCns is not the default value: 0->15, 0->15
+
+ simd_t selCns = {};
+ for (size_t index = 0; index < simdSize; index++)
+ {
+ // get pointer to our leftWants/rightWants
+ uint8_t* wants = (index < 16) ? (&leftWants) : (&rightWants);
+
+ // update our wants based on which values we use
+ value = vecCns.u8[index];
+ if (value < 16)
+ {
+ *wants |= 1;
+ }
+ else if (value < 32)
+ {
+ *wants |= 2;
+ }
+
+ // update our conditional select mask for if we need 2 shuffles
+ value ^= static_cast(index & 0x10);
+ selCns.u8[index] = ((value < 32) && (value >= 16)) ? 0xFF : 0;
+
+ // normalise our shuffle mask, and check if it's default
+ if (vecCns.u8[index] < 32)
+ {
+ vecCns.u8[index] &= 0x0F;
+ }
+ if (vecCns.u8[index] != (index & 0x0F))
+ {
+ nonDefaultShuffleMask = true;
+ }
+ }
+
+ // we might be able to get away with only 1 shuffle, this is the case if neither leftWants nor
+ // rightWants are 3 (indicating only 0/1 side used)
+ if ((leftWants != 3) && (rightWants != 3))
+ {
+ // set result to its initial value
+ retNode = op1;
+
+ // get the permutation control
+ uint8_t control = 0;
+ if (leftWants == 2)
+ {
+ // if left wants right lane, then set that bit
+ control |= 1;
+ }
+ if (rightWants != 1)
+ {
+ // if right wants right lane (or neither), then set the bit for right lane
+ control |= 16;
+ }
+
+ // create the permutation node
+ // if we have 16, then we don't need to actually permute, since that's what we start with
+ if (control != 16)
+ {
+ GenTree* retNodeDup = fgMakeMultiUse(&retNode);
+
+ cnsNode = gtNewIconNode(control);
+ retNode = gtNewSimdHWIntrinsicNode(type, retNode, retNodeDup, cnsNode, NI_AVX2_Permute2x128,
+ simdBaseJitType, simdSize);
+ }
+
+ // if we have a non-default shuffle mask, we need to do Avx2.Shuffle
+ if (nonDefaultShuffleMask)
+ {
+ op2 = gtNewVconNode(type);
+ op2->AsVecCon()->gtSimdVal = vecCns;
+
+ retNode = gtNewSimdHWIntrinsicNode(type, retNode, op2, NI_AVX2_Shuffle, simdBaseJitType, simdSize);
+ }
+ }
+ else
+ {
+ GenTree* op1Dup1 = fgMakeMultiUse(&op1);
+ GenTree* op1Dup2 = gtCloneExpr(op1Dup1);
+
+ // create the control for swapping
+ uint8_t control = 1; // 0b00000001
+ cnsNode = gtNewIconNode(control);
+ GenTree* swap = gtNewSimdHWIntrinsicNode(type, op1Dup1, op1Dup2, cnsNode, NI_AVX2_Permute2x128,
+ simdBaseJitType, simdSize);
+
+ // if we have non-default shuffle mask
+ if (nonDefaultShuffleMask)
+ {
+ // create the shuffle indices node
+ op2 = gtNewVconNode(type);
+ op2->AsVecCon()->gtSimdVal = vecCns;
+
+ GenTree* op2Dup = fgMakeMultiUse(&op2);
+
+ // shuffle both op1 and swap(op1)
+ op1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX2_Shuffle, simdBaseJitType, simdSize);
+ swap = gtNewSimdHWIntrinsicNode(type, swap, op2Dup, NI_AVX2_Shuffle, simdBaseJitType, simdSize);
+ }
+
+ // select the appropriate values
+ GenTree* selNode = gtNewVconNode(type);
+ selNode->AsVecCon()->gtSimdVal = selCns;
+ retNode = gtNewSimdHWIntrinsicNode(type, op1, swap, selNode, NI_AVX2_BlendVariable, simdBaseJitType,
+ simdSize);
+ }
+
+ assert(retNode != nullptr);
+ return retNode;
+ }
+
+ if (elementSize == 4)
+ {
+ // try to use vpshufd/vshufps instead of vpermd/vpermps.
+ if ((!crossLane) && (!differsByLane))
+ {
+ assert(!needsZero);
+ unsigned immediate = (unsigned)0;
+ for (size_t i = 0; i < 4; i++)
+ {
+ value = op2->GetIntegralVectorConstElement(i, simdBaseType);
+ immediate |= static_cast((value & (uint64_t)3) << (i * 2));
+ }
+ if (varTypeIsFloating(simdBaseType))
+ {
+ op2 = gtNewIconNode(immediate);
+ GenTree* op1Copy = fgMakeMultiUse(&op1);
+ return gtNewSimdHWIntrinsicNode(type, op1, op1Copy, op2, NI_AVX_Shuffle, simdBaseJitType, simdSize);
+ }
+ else
+ {
+ op2 = gtNewIconNode(immediate);
+ return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX2_Shuffle, simdBaseJitType, simdSize);
+ }
+ }
+
+ // otherwise, use vpermd/vpermps.
+ else
+ {
+ for (uint32_t i = 0; i < elementCount; i++)
+ {
+ vecCns.u32[i] = (uint8_t)(vecCns.u8[i * elementSize] / elementSize);
+ }
+
+ op2 = gtNewVconNode(type);
+ op2->AsVecCon()->gtSimdVal = vecCns;
+
+ // swap the operands to match the encoding requirements
+ retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX2_PermuteVar8x32, simdBaseJitType, simdSize);
+ }
}
else if (elementSize == 2)
{
+ assert(crossLane);
assert(canUseEvexEncodingDebugOnly());
for (uint32_t i = 0; i < elementCount; i++)
{
@@ -25651,6 +26414,7 @@ GenTree* Compiler::gtNewSimdShuffleNode(
}
else if (elementSize == 1)
{
+ assert(crossLane);
assert(IsAvx10OrIsaSupportedDebugOnly(InstructionSet_AVX512VBMI_VL));
op2 = gtNewVconNode(type);
op2->AsVecCon()->gtSimdVal = vecCns;
@@ -25665,14 +26429,80 @@ GenTree* Compiler::gtNewSimdShuffleNode(
{
assert(elementSize == 8);
- cnsNode = gtNewIconNode(control);
- retNode = gtNewSimdHWIntrinsicNode(type, op1, cnsNode, NI_AVX2_Permute4x64, simdBaseJitType, simdSize);
+ // try to use vshufpd instead of vpermpd.
+ if (!crossLane)
+ {
+ assert(!needsZero);
+ unsigned immediate = (unsigned)0;
+ for (size_t i = 0; i < elementCount; i++)
+ {
+ value = op2->GetIntegralVectorConstElement(i, simdBaseType);
+ immediate |= static_cast((value & (uint64_t)1) << i);
+ }
+ op2 = gtNewIconNode(immediate);
+ GenTree* op1Copy = fgMakeMultiUse(&op1);
+ return gtNewSimdHWIntrinsicNode(type, op1, op1Copy, op2, NI_AVX_Shuffle, CORINFO_TYPE_DOUBLE, simdSize);
+ }
+
+ // otherwise, use vpermpd.
+ else
+ {
+ cnsNode = gtNewIconNode(control);
+ retNode = gtNewSimdHWIntrinsicNode(type, op1, cnsNode, NI_AVX2_Permute4x64, simdBaseJitType, simdSize);
+ }
}
}
else if (simdSize == 64)
{
assert(IsBaselineVector512IsaSupportedDebugOnly());
- if (elementSize == 4)
+ if (!crossLane)
+ {
+ // if element size is 64-bit, try to use vshufpd instead of vpshufb.
+ if ((elementSize == 8) && (!needsZero))
+ {
+ unsigned immediate = (unsigned)0;
+ for (size_t i = 0; i < elementCount; i++)
+ {
+ value = op2->GetIntegralVectorConstElement(i, simdBaseType);
+ immediate |= static_cast((value & (uint64_t)1) << i);
+ }
+ op2 = gtNewIconNode(immediate);
+ GenTree* op1Copy = fgMakeMultiUse(&op1);
+ return gtNewSimdHWIntrinsicNode(type, op1, op1Copy, op2, NI_AVX512F_Shuffle, CORINFO_TYPE_DOUBLE,
+ simdSize);
+ }
+
+ // if the element size is 32-bit, try to use vpshufd/vshufps instead of vpshufb,
+ // if the indices (when masked to within the lane) are the same for every lane.
+ if ((elementSize == 4) && (!needsZero) && (!differsByLane))
+ {
+ unsigned immediate = (unsigned)0;
+ for (size_t i = 0; i < 4; i++)
+ {
+ value = op2->GetIntegralVectorConstElement(i, simdBaseType);
+ immediate |= static_cast((value & (uint64_t)3) << (i * 2));
+ }
+ if (varTypeIsFloating(simdBaseType))
+ {
+ op2 = gtNewIconNode(immediate);
+ GenTree* op1Copy = fgMakeMultiUse(&op1);
+ return gtNewSimdHWIntrinsicNode(type, op1, op1Copy, op2, NI_AVX512F_Shuffle, simdBaseJitType,
+ simdSize);
+ }
+ else
+ {
+ op2 = gtNewIconNode(immediate);
+ return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512F_Shuffle, simdBaseJitType, simdSize);
+ }
+ }
+
+ op2 = gtNewVconNode(type);
+ op2->AsVecCon()->gtSimdVal = vecCns;
+
+ simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE;
+ return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512BW_Shuffle, simdBaseJitType, simdSize);
+ }
+ else if (elementSize == 4)
{
for (uint32_t i = 0; i < elementCount; i++)
{
@@ -25780,7 +26610,7 @@ GenTree* Compiler::gtNewSimdShuffleNode(
if (needsZero)
{
- assert((simdSize == 32) || !compIsaSupportedDebugOnly(InstructionSet_SSSE3));
+ assert((simdSize == 32) || (!compIsaSupportedDebugOnly(InstructionSet_SSSE3)));
op2 = gtNewVconNode(type);
op2->AsVecCon()->gtSimdVal = mskCns;
@@ -25792,18 +26622,6 @@ GenTree* Compiler::gtNewSimdShuffleNode(
uint64_t value = 0;
simd_t vecCns = {};
- if (simdSize == 16)
- {
- // Vector128.Shuffle(a, Vector128.Create(2, 3, 0, 1)) -> ExtractVector128(v.AsUInt64(), v.AsUInt64(), 1)
- if ((op2->GetIntegralVectorConstElement(0, TYP_ULONG) == 0x300000002) &&
- (op2->GetIntegralVectorConstElement(1, TYP_ULONG) == 0x100000000))
- {
- GenTree* op1Clone = fgMakeMultiUse(&op1);
- return gtNewSimdHWIntrinsicNode(type, op1, op1Clone, gtNewIconNode(1), NI_AdvSimd_ExtractVector128,
- CORINFO_TYPE_ULONG, simdSize);
- }
- }
-
for (size_t index = 0; index < elementCount; index++)
{
value = op2->GetIntegralVectorConstElement(index, simdBaseType);
@@ -25824,6 +26642,17 @@ GenTree* Compiler::gtNewSimdShuffleNode(
}
}
+ if (simdSize == 16)
+ {
+ // Vector128.Shuffle(a, Vector128.Create(2, 3, 0, 1)) -> ExtractVector128(v.AsUInt64(), v.AsUInt64(), 1)
+ if ((vecCns.u64[0] == 0x0F0E0D0C0B0A0908) && (vecCns.u64[1] == 0x0706050403020100))
+ {
+ GenTree* op1Clone = fgMakeMultiUse(&op1);
+ return gtNewSimdHWIntrinsicNode(type, op1, op1Clone, gtNewIconNode(1), NI_AdvSimd_ExtractVector128,
+ CORINFO_TYPE_ULONG, simdSize);
+ }
+ }
+
NamedIntrinsic lookupIntrinsic = NI_AdvSimd_VectorTableLookup;
if (simdSize == 16)
@@ -26368,18 +27197,6 @@ GenTree* Compiler::gtNewSimdToScalarNode(var_types type, GenTree* op1, CorInfoTy
NamedIntrinsic intrinsic = NI_Illegal;
#ifdef TARGET_XARCH
-#if defined(TARGET_X86)
- if (varTypeIsLong(simdBaseType))
- {
- // We need SSE41 to handle long, use software fallback
- assert(compIsaSupportedDebugOnly(InstructionSet_SSE41));
-
- // Create a GetElement node which handles decomposition
- GenTree* op2 = gtNewIconNode(0);
- return gtNewSimdGetElementNode(type, op1, op2, simdBaseJitType, simdSize);
- }
-#endif // TARGET_X86
-
if (simdSize == 64)
{
assert(IsBaselineVector512IsaSupportedDebugOnly());
@@ -27539,7 +28356,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
}
//------------------------------------------------------------------------
-// OperIsMemoryLoad: Does this HWI node have memory store semantics?
+// OperIsMemoryStore: Does this HWI node have memory store semantics?
//
// Arguments:
// pAddr - optional [out] parameter for the address
@@ -27674,7 +28491,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryStoreOrBarrier() const
}
//------------------------------------------------------------------------
-// OperIsEmbBroadcastCompatible: Checks if the intrinsic is a embedded broadcast compatible inintrsic.
+// OperIsEmbBroadcastCompatible: Checks if the intrinsic is a embedded broadcast compatible intrinsic.
//
// Return Value:
// true if the intrinsic node lowering instruction is embedded broadcast compatible.
@@ -27740,37 +28557,6 @@ bool GenTreeHWIntrinsic::OperIsBroadcastScalar() const
#endif
}
-//------------------------------------------------------------------------
-// OperIsCreateScalarUnsafe: Is this HWIntrinsic a CreateScalarUnsafe node.
-//
-// Return Value:
-// Whether "this" is a CreateScalarUnsafe node.
-//
-bool GenTreeHWIntrinsic::OperIsCreateScalarUnsafe() const
-{
- NamedIntrinsic intrinsicId = GetHWIntrinsicId();
-
- switch (intrinsicId)
- {
-#if defined(TARGET_ARM64)
- case NI_Vector64_CreateScalarUnsafe:
-#endif // TARGET_ARM64
- case NI_Vector128_CreateScalarUnsafe:
-#if defined(TARGET_XARCH)
- case NI_Vector256_CreateScalarUnsafe:
- case NI_Vector512_CreateScalarUnsafe:
-#endif // TARGET_XARCH
- {
- return true;
- }
-
- default:
- {
- return false;
- }
- }
-}
-
//------------------------------------------------------------------------
// OperIsBitwiseHWIntrinsic: Is the operation a bitwise logic operation.
//
@@ -30173,15 +30959,24 @@ bool GenTreeHWIntrinsic::ShouldConstantProp(GenTree* operand, GenTreeVecCon* vec
#endif // TARGET_XARCH
case NI_Vector128_Shuffle:
+ case NI_Vector128_ShuffleNative:
+ case NI_Vector128_ShuffleNativeFallback:
#if defined(TARGET_XARCH)
case NI_Vector256_Shuffle:
+ case NI_Vector256_ShuffleNative:
+ case NI_Vector256_ShuffleNativeFallback:
case NI_Vector512_Shuffle:
+ case NI_Vector512_ShuffleNative:
+ case NI_Vector512_ShuffleNativeFallback:
#elif defined(TARGET_ARM64)
case NI_Vector64_Shuffle:
+ case NI_Vector64_ShuffleNative:
+ case NI_Vector64_ShuffleNativeFallback:
#endif
{
- // The shuffle indices need to be constant so we can preserve
- // the node as a hwintrinsic instead of rewriting as a user call.
+ // The shuffle indices ideally are constant so we can get the best
+ // codegen possible. There are also some case/s where it would have
+ // to rewrite as a user call instead depending on available intrinsics.
assert(GetOperandCount() == 2);
return IsUserCall() && (operand == Op(2));
}
diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
index 6af651b6860882..9f22ca88372380 100644
--- a/src/coreclr/jit/gentree.h
+++ b/src/coreclr/jit/gentree.h
@@ -6434,7 +6434,6 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
bool OperIsMemoryStoreOrBarrier() const;
bool OperIsEmbBroadcastCompatible() const;
bool OperIsBroadcastScalar() const;
- bool OperIsCreateScalarUnsafe() const;
bool OperIsBitwiseHWIntrinsic() const;
bool OperIsEmbRoundingEnabled() const;
@@ -6788,26 +6787,25 @@ struct GenTreeVecCon : public GenTree
case TYP_LONG:
case TYP_ULONG:
{
-#if defined(TARGET_64BIT)
- if (arg->IsCnsIntOrI())
+ if (arg->IsIntegralConst())
{
- simdVal.i64[argIdx] = static_cast(arg->AsIntCon()->gtIconVal);
+ simdVal.i64[argIdx] = arg->AsIntConCommon()->IntegralValue();
return true;
}
-#else
- if (arg->OperIsLong() && arg->AsOp()->gtOp1->IsCnsIntOrI() && arg->AsOp()->gtOp2->IsCnsIntOrI())
+#if !defined(TARGET_64BIT)
+ else if (arg->OperIsLong() && arg->gtGetOp1()->IsCnsIntOrI() && arg->gtGetOp2()->IsCnsIntOrI())
{
- // 32-bit targets will decompose GT_CNS_LNG into two GT_CNS_INT
+ // 32-bit targets may decompose GT_CNS_LNG into two GT_CNS_INT
// We need to reconstruct the 64-bit value in order to handle this
- INT64 gtLconVal = arg->AsOp()->gtOp2->AsIntCon()->gtIconVal;
+ INT64 gtLconVal = arg->gtGetOp2()->AsIntCon()->gtIconVal;
gtLconVal <<= 32;
- gtLconVal |= arg->AsOp()->gtOp1->AsIntCon()->gtIconVal;
+ gtLconVal |= static_cast(arg->gtGetOp1()->AsIntCon()->gtIconVal);
simdVal.i64[argIdx] = gtLconVal;
return true;
}
-#endif // TARGET_64BIT
+#endif // !TARGET_64BIT
else
{
// We expect the constant to have been already zeroed
diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h
index d8bf386eb6009d..d936d579d8e25a 100644
--- a/src/coreclr/jit/hwintrinsic.h
+++ b/src/coreclr/jit/hwintrinsic.h
@@ -923,6 +923,96 @@ struct HWIntrinsicInfo
return false;
}
+ static bool IsVectorCreate(NamedIntrinsic id)
+ {
+ switch (id)
+ {
+#if defined(TARGET_ARM64)
+ case NI_Vector64_Create:
+#endif // TARGET_ARM64
+ case NI_Vector128_Create:
+#if defined(TARGET_XARCH)
+ case NI_Vector256_Create:
+ case NI_Vector512_Create:
+#endif // TARGET_XARCH
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ static bool IsVectorCreateScalar(NamedIntrinsic id)
+ {
+ switch (id)
+ {
+#if defined(TARGET_ARM64)
+ case NI_Vector64_CreateScalar:
+#endif // TARGET_ARM64
+ case NI_Vector128_CreateScalar:
+#if defined(TARGET_XARCH)
+ case NI_Vector256_CreateScalar:
+ case NI_Vector512_CreateScalar:
+#endif // TARGET_XARCH
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ static bool IsVectorCreateScalarUnsafe(NamedIntrinsic id)
+ {
+ switch (id)
+ {
+#if defined(TARGET_ARM64)
+ case NI_Vector64_CreateScalarUnsafe:
+#endif // TARGET_ARM64
+ case NI_Vector128_CreateScalarUnsafe:
+#if defined(TARGET_XARCH)
+ case NI_Vector256_CreateScalarUnsafe:
+ case NI_Vector512_CreateScalarUnsafe:
+#endif // TARGET_XARCH
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ static bool IsVectorGetElement(NamedIntrinsic id)
+ {
+ switch (id)
+ {
+#if defined(TARGET_ARM64)
+ case NI_Vector64_GetElement:
+#endif // TARGET_ARM64
+ case NI_Vector128_GetElement:
+#if defined(TARGET_XARCH)
+ case NI_Vector256_GetElement:
+ case NI_Vector512_GetElement:
+#endif // TARGET_XARCH
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ static bool IsVectorToScalar(NamedIntrinsic id)
+ {
+ switch (id)
+ {
+#if defined(TARGET_ARM64)
+ case NI_Vector64_ToScalar:
+#endif // TARGET_ARM64
+ case NI_Vector128_ToScalar:
+#if defined(TARGET_XARCH)
+ case NI_Vector256_ToScalar:
+ case NI_Vector512_ToScalar:
+#endif // TARGET_XARCH
+ return true;
+ default:
+ return false;
+ }
+ }
+
static bool HasImmediateOperand(NamedIntrinsic id)
{
#if defined(TARGET_ARM64)
diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp
index be1c577d4bdfaa..a9c50c029cc22f 100644
--- a/src/coreclr/jit/hwintrinsicarm64.cpp
+++ b/src/coreclr/jit/hwintrinsicarm64.cpp
@@ -2251,38 +2251,56 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
case NI_Vector64_Shuffle:
case NI_Vector128_Shuffle:
+ case NI_Vector64_ShuffleNative:
+ case NI_Vector128_ShuffleNative:
+ case NI_Vector64_ShuffleNativeFallback:
+ case NI_Vector128_ShuffleNativeFallback:
{
assert((sig->numArgs == 2) || (sig->numArgs == 3));
assert((simdSize == 8) || (simdSize == 16));
+ // The Native variants are non-deterministic on arm64 (for element size > 1)
+ bool isShuffleNative = (intrinsic != NI_Vector64_Shuffle) && (intrinsic != NI_Vector128_Shuffle);
+ if (isShuffleNative && (genTypeSize(simdBaseType) > 1) && BlockNonDeterministicIntrinsics(mustExpand))
+ {
+ break;
+ }
+
GenTree* indices = impStackTop(0).val;
- if (!indices->IsCnsVec() || !IsValidForShuffle(indices->AsVecCon(), simdSize, simdBaseType))
+ // Check if the required intrinsics to emit are available.
+ bool canBecomeValidForShuffle = false;
+ if (!IsValidForShuffle(indices, simdSize, simdBaseType, &canBecomeValidForShuffle, isShuffleNative))
+ {
+ // All cases on arm64 are either valid or invalid, they cannot become valid later
+ assert(!canBecomeValidForShuffle);
+ break;
+ }
+
+ // If the indices might become constant later, then we don't emit for now, delay until later.
+ if (!indices->IsCnsVec())
{
assert(sig->numArgs == 2);
- if (!opts.OptimizationEnabled())
+ if (opts.OptimizationEnabled())
{
// Only enable late stage rewriting if optimizations are enabled
// as we won't otherwise encounter a constant at the later point
- return nullptr;
- }
-
- op2 = impSIMDPopStack();
- op1 = impSIMDPopStack();
+ op2 = impSIMDPopStack();
+ op1 = impSIMDPopStack();
- retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
+ retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
- retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint));
- break;
+ retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint));
+ break;
+ }
}
if (sig->numArgs == 2)
{
- op2 = impSIMDPopStack();
- op1 = impSIMDPopStack();
-
- retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize);
+ op2 = impSIMDPopStack();
+ op1 = impSIMDPopStack();
+ retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize, isShuffleNative);
}
break;
}
diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
index 4a73297d3d1e18..19792a61c4083e 100644
--- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
@@ -168,6 +168,226 @@ static insOpts AddEmbMaskingMode(insOpts instOptions, regNumber maskReg, bool me
return static_cast(result);
}
+//------------------------------------------------------------------------
+// GetImmediateMaxAndMask: Returns the max valid value and a bit mask for
+// a full-range immediate of an instruction that has documented
+// masking or clamping of the immediate.
+//
+// Arguments:
+// instruction - The instruction to look up
+// simdSize - The vector size for the instruction
+// maskOut - A pointer to the location to return the mask
+//
+// Return Value:
+// The max useful immediate value
+//
+static unsigned GetImmediateMaxAndMask(instruction ins, unsigned simdSize, unsigned* maskOut)
+{
+ assert(maskOut != nullptr);
+ assert((simdSize >= 16) && (simdSize <= 64));
+
+ unsigned lanes = simdSize / genTypeSize(TYP_SIMD16);
+ unsigned mask = 0xFF;
+ unsigned max = 0;
+
+ switch (ins)
+ {
+ // These byte-wise shift instructions are documented to return a zero vector
+ // for shift amounts 16 or greater.
+ case INS_pslldq:
+ case INS_psrldq:
+ {
+ max = 16;
+ break;
+ }
+
+ // palignr concatenates two 16-byte lanes and shifts the result by imm8 bytes.
+ // It is documented to return a zero vector for shift amounts 32 or greater.
+ case INS_palignr:
+ {
+ max = 32;
+ break;
+ }
+
+ // The following groups of instructions extract/insert a scalar value from/to a
+ // 128-bit vector and use a documented range of bits for element index.
+ case INS_pextrq:
+ case INS_pinsrq:
+ {
+ mask = 0b00000001;
+ max = mask;
+ break;
+ }
+
+ case INS_extractps:
+ case INS_pextrd:
+ case INS_pinsrd:
+ {
+ mask = 0b00000011;
+ max = mask;
+ break;
+ }
+
+ case INS_pextrw:
+ case INS_pinsrw:
+ {
+ mask = 0b00000111;
+ max = mask;
+ break;
+ }
+
+ case INS_pextrb:
+ case INS_pinsrb:
+ {
+ mask = 0b00001111;
+ max = mask;
+ break;
+ }
+
+ // The following instructions concatenate 128- or 256-bit vectors and shift the
+ // result right by imm8 elements. The number of bits used depends on the
+ // vector size / element size.
+ case INS_valignd:
+ {
+ mask = (simdSize / genTypeSize(TYP_INT)) - 1;
+ max = mask;
+ break;
+ }
+
+ case INS_valignq:
+ {
+ mask = (simdSize / genTypeSize(TYP_LONG)) - 1;
+ max = mask;
+ break;
+ }
+
+ // The following groups of instructions operate in 128-bit lanes but use a
+ // different range of bits from the immediate for each lane.
+ case INS_blendpd:
+ case INS_shufpd:
+ case INS_vpermilpd:
+ {
+ assert(lanes <= 4);
+
+ // two bits per lane
+ mask = (1 << (lanes * 2)) - 1;
+ max = mask;
+ break;
+ }
+
+ case INS_blendps:
+ case INS_vpblendd:
+ {
+ assert(lanes <= 2);
+
+ // four bits per lane
+ mask = (1 << (lanes * 4)) - 1;
+ max = mask;
+ break;
+ }
+
+ case INS_mpsadbw:
+ {
+ assert(lanes <= 2);
+
+ // three bits per lane
+ mask = (1 << (lanes * 3)) - 1;
+ max = mask;
+ break;
+ }
+
+ // These instructions extract/insert a 128-bit vector from/to either a 256-bit or
+ // 512-bit vector. The number of positions is equal to the number of 128-bit lanes.
+ case INS_vextractf128:
+ case INS_vextracti128:
+ case INS_vextractf64x2:
+ case INS_vextracti64x2:
+ case INS_vinsertf128:
+ case INS_vinserti128:
+ case INS_vinsertf64x2:
+ case INS_vinserti64x2:
+ {
+ assert(lanes >= 2);
+
+ mask = lanes - 1;
+ max = mask;
+ break;
+ }
+
+ // These instructions shuffle 128-bit lanes within a larger vector.
+ // The number of bits used depends on the number of possible lanes.
+ case INS_vshuff32x4:
+ case INS_vshufi32x4:
+ case INS_vshuff64x2:
+ case INS_vshufi64x2:
+ {
+ assert(lanes >= 2);
+
+ // log2(lanes) bits per lane for src selection
+ mask = (1 << (lanes * BitOperations::Log2(lanes))) - 1;
+ max = mask;
+ break;
+ }
+
+ // These instructions extract/insert a 256-bit vector from/to a 512-bit vector
+ // and therefore only have two possible positions.
+ case INS_vextractf32x8:
+ case INS_vextracti32x8:
+ case INS_vextractf64x4:
+ case INS_vextracti64x4:
+ case INS_vinsertf32x8:
+ case INS_vinserti32x8:
+ case INS_vinsertf64x4:
+ case INS_vinserti64x4:
+ {
+ assert(simdSize == 64);
+
+ mask = 0b00000001;
+ max = mask;
+ break;
+ }
+
+ // The following instructions use documented ranges of bits with gaps in them.
+ case INS_dppd:
+ {
+ // bits [1:0] are the result broadcast mask
+ // bits [5:4] are the element selection mask
+ mask = 0b00110011;
+ max = mask;
+ break;
+ }
+
+ case INS_pclmulqdq:
+ {
+ // bit 0 selects the src1 qword
+ // bit 4 selects the src2 qword
+ mask = 0b00010001;
+ max = mask;
+ break;
+ }
+
+ case INS_vperm2f128:
+ case INS_vperm2i128:
+ {
+ // bits [1:0] select the src index for the low lane result
+ // bits [5:4] select the src index for the high lane result
+ // bits 3 and 7, if set, will zero the low or high lane, respectively
+ mask = 0b10111011;
+ max = mask;
+ break;
+ }
+
+ default:
+ {
+ max = 255;
+ break;
+ }
+ }
+
+ *maskOut = mask;
+ return max;
+}
+
//------------------------------------------------------------------------
// genHWIntrinsic: Generates the code for a given hardware intrinsic node.
//
@@ -332,8 +552,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
};
regNumber baseReg = internalRegisters.Extract(node);
regNumber offsReg = internalRegisters.GetSingle(node);
- genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg,
- emitSwCase);
+ genHWIntrinsicJumpTableFallback(intrinsicId, ins, simdSize, lastOp->GetRegNum(), baseReg,
+ offsReg, emitSwCase);
break;
}
case 2:
@@ -344,8 +564,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
};
regNumber baseReg = internalRegisters.Extract(node);
regNumber offsReg = internalRegisters.GetSingle(node);
- genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg,
- emitSwCase);
+ genHWIntrinsicJumpTableFallback(intrinsicId, ins, simdSize, lastOp->GetRegNum(), baseReg,
+ offsReg, emitSwCase);
break;
}
@@ -533,7 +753,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
// constant value.
regNumber baseReg = internalRegisters.Extract(node);
regNumber offsReg = internalRegisters.GetSingle(node);
- genHWIntrinsicJumpTableFallback(intrinsicId, op2Reg, baseReg, offsReg, emitSwCase);
+ genHWIntrinsicJumpTableFallback(intrinsicId, ins, simdSize, op2Reg, baseReg, offsReg,
+ emitSwCase);
}
}
else if (node->TypeGet() == TYP_VOID)
@@ -583,7 +804,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
// can also occur if the consumer calls it directly and just doesn't pass a constant value.
regNumber baseReg = internalRegisters.Extract(node);
regNumber offsReg = internalRegisters.GetSingle(node);
- genHWIntrinsicJumpTableFallback(intrinsicId, op3Reg, baseReg, offsReg, emitSwCase);
+ genHWIntrinsicJumpTableFallback(intrinsicId, ins, simdSize, op3Reg, baseReg, offsReg,
+ emitSwCase);
}
}
else if (category == HW_Category_MemoryStore)
@@ -681,7 +903,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
// can also occur if the consumer calls it directly and just doesn't pass a constant value.
regNumber baseReg = internalRegisters.Extract(node);
regNumber offsReg = internalRegisters.GetSingle(node);
- genHWIntrinsicJumpTableFallback(intrinsicId, op4Reg, baseReg, offsReg, emitSwCase);
+ genHWIntrinsicJumpTableFallback(intrinsicId, ins, simdSize, op4Reg, baseReg, offsReg,
+ emitSwCase);
}
}
else
@@ -857,7 +1080,7 @@ void CodeGen::genHWIntrinsic_R_RM(
if (((instOptions & INS_OPTS_EVEX_b_MASK) != 0) && (rmOpDesc.GetKind() == OperandKind::Reg))
{
- // As embedded rounding only appies in R_R case, we can skip other checks for different paths.
+ // As embedded rounding only applies in R_R case, we can skip other checks for different paths.
regNumber op1Reg = rmOp->GetRegNum();
assert(op1Reg != REG_NA);
@@ -948,7 +1171,7 @@ void CodeGen::genHWIntrinsic_R_RM(
// that failed and we either didn't get marked regOptional or we did and didn't get spilled
//
// As such, we need to emulate the removed CreateScalarUnsafe to ensure that op1 is in a
- // SIMD register so the broadcast instruction can execute succesfully. We'll just move
+ // SIMD register so the broadcast instruction can execute successfully. We'll just move
// the value into the target register and then broadcast it out from that.
emitAttr movdAttr = emitActualTypeSize(node->GetSimdBaseType());
@@ -1212,7 +1435,7 @@ void CodeGen::genHWIntrinsic_R_R_R_RM(instruction ins,
if (((instOptions & INS_OPTS_EVEX_b_MASK) != 0) && (op3Desc.GetKind() == OperandKind::Reg))
{
- // As embedded rounding only appies in R_R case, we can skip other checks for different paths.
+ // As embedded rounding only applies in R_R case, we can skip other checks for different paths.
regNumber op3Reg = op3->GetRegNum();
assert(op3Reg != REG_NA);
@@ -1368,6 +1591,8 @@ void CodeGen::genHWIntrinsic_R_R_R_RM_I(
//
// Arguments:
// intrinsic - intrinsic ID
+// ins - the instruction chosen for the intrinsic and base type
+// attr - the emit attributes for the instruction
// nonConstImmReg - the register contains non-constant imm8 argument
// baseReg - a register for the start of the switch table
// offsReg - a register for the offset into the switch table
@@ -1382,6 +1607,8 @@ void CodeGen::genHWIntrinsic_R_R_R_RM_I(
//
template
void CodeGen::genHWIntrinsicJumpTableFallback(NamedIntrinsic intrinsic,
+ instruction ins,
+ emitAttr attr,
regNumber nonConstImmReg,
regNumber baseReg,
regNumber offsReg,
@@ -1389,18 +1616,44 @@ void CodeGen::genHWIntrinsicJumpTableFallback(NamedIntrinsic intrinsi
{
assert(nonConstImmReg != REG_NA);
// AVX2 Gather intrinsics use managed non-const fallback since they have discrete imm8 value range
- // that does work with the current compiler generated jump-table fallback
+ // that does not work with the current compiler generated jump-table fallback
assert(!HWIntrinsicInfo::isAVX2GatherIntrinsic(intrinsic));
emitter* emit = GetEmitter();
- const unsigned maxByte = (unsigned)HWIntrinsicInfo::lookupImmUpperBound(intrinsic) + 1;
- assert(maxByte <= 256);
- BasicBlock* jmpTable[256];
+ unsigned maxByte = (unsigned)HWIntrinsicInfo::lookupImmUpperBound(intrinsic);
+ unsigned mask = 0xFF;
+
+ // Some instructions allow full-range immediates but are documented to ignore ranges of bits
+ // or to clamp the value. We can implement the same masking/clamping here in order to reduce
+ // the size of the generated code and jump table.
+
+ if (HWIntrinsicInfo::HasFullRangeImm(intrinsic))
+ {
+ maxByte = GetImmediateMaxAndMask(ins, EA_SIZE(attr), &mask);
+
+ if (mask != 0xFF)
+ {
+ emit->emitIns_R_I(INS_and, EA_4BYTE, nonConstImmReg, mask);
+ }
+ else if (maxByte < 255)
+ {
+ emit->emitIns_R_I(INS_cmp, EA_4BYTE, nonConstImmReg, maxByte);
- unsigned jmpTableBase = emit->emitBBTableDataGenBeg(maxByte, true);
+ BasicBlock* skipLabel = genCreateTempLabel();
+ inst_JMP(EJ_jbe, skipLabel);
+
+ instGen_Set_Reg_To_Imm(EA_4BYTE, nonConstImmReg, maxByte);
+
+ genDefineTempLabel(skipLabel);
+ }
+ }
+
+ assert(maxByte <= 255);
+ BasicBlock* jmpTable[256];
+ unsigned jmpTableBase = emit->emitBBTableDataGenBeg(maxByte + 1, true);
// Emit the jump table
- for (unsigned i = 0; i < maxByte; i++)
+ for (unsigned i = 0; i <= maxByte; i++)
{
jmpTable[i] = genCreateTempLabel();
emit->emitDataGenData(i, jmpTable[i]);
@@ -1423,9 +1676,18 @@ void CodeGen::genHWIntrinsicJumpTableFallback(NamedIntrinsic intrinsi
genDefineTempLabel(switchTableBeg);
- for (unsigned i = 0; i < maxByte; i++)
+ for (unsigned i = 0; i <= maxByte; i++)
{
genDefineTempLabel(jmpTable[i]);
+
+ if ((i & mask) != i)
+ {
+ // This is a jump table entry that won't be hit, because the value can't exist after
+ // masking. We define the labels for these values in order to pad out the jump table
+ // so that the valid entries fall at the correct offsets, but we don't emit any code.
+ continue;
+ }
+
emitSwCase((int8_t)i);
emit->emitIns_J(INS_jmp, switchTableEnd);
}
@@ -1463,7 +1725,7 @@ void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic*
};
regNumber baseReg = internalRegisters.Extract(node);
regNumber offsReg = internalRegisters.GetSingle(node);
- genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase);
+ genHWIntrinsicJumpTableFallback(intrinsicId, ins, attr, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase);
break;
}
@@ -1488,7 +1750,7 @@ void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic*
};
regNumber baseReg = internalRegisters.Extract(node);
regNumber offsReg = internalRegisters.GetSingle(node);
- genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase);
+ genHWIntrinsicJumpTableFallback(intrinsicId, ins, attr, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase);
break;
}
@@ -1504,7 +1766,7 @@ void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic*
};
regNumber baseReg = internalRegisters.Extract(node);
regNumber offsReg = internalRegisters.GetSingle(node);
- genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase);
+ genHWIntrinsicJumpTableFallback(intrinsicId, ins, attr, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase);
break;
}
@@ -1540,7 +1802,7 @@ void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic*
};
regNumber baseReg = internalRegisters.Extract(node);
regNumber offsReg = internalRegisters.GetSingle(node);
- genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase);
+ genHWIntrinsicJumpTableFallback(intrinsicId, ins, attr, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase);
break;
}
@@ -1581,13 +1843,67 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
switch (intrinsicId)
{
+ case NI_Vector128_CreateScalar:
+ case NI_Vector256_CreateScalar:
+ case NI_Vector512_CreateScalar:
case NI_Vector128_CreateScalarUnsafe:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_CreateScalarUnsafe:
{
if (varTypeIsIntegral(baseType))
{
- genHWIntrinsic_R_RM(node, ins, emitActualTypeSize(baseType), targetReg, op1, instOptions);
+ emitAttr baseAttr = emitActualTypeSize(baseType);
+
+#if defined(TARGET_X86)
+ if (varTypeIsLong(baseType))
+ {
+ assert(op1->isContained());
+
+ if (op1->OperIsLong())
+ {
+ node->SetSimdBaseJitType(CORINFO_TYPE_INT);
+
+ bool canCombineLoad = false;
+ GenTree* loPart = op1->gtGetOp1();
+ GenTree* hiPart = op1->gtGetOp2();
+
+ if ((loPart->isContained() && hiPart->isContained()) &&
+ (loPart->OperIs(GT_LCL_FLD) && hiPart->OperIs(GT_LCL_FLD)))
+ {
+ GenTreeLclFld* loFld = loPart->AsLclFld();
+ GenTreeLclFld* hiFld = hiPart->AsLclFld();
+
+ canCombineLoad = (hiFld->GetLclNum() == loFld->GetLclNum()) &&
+ (hiFld->GetLclOffs() == (loFld->GetLclOffs() + 4));
+ }
+
+ if (!canCombineLoad)
+ {
+ if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41))
+ {
+ genHWIntrinsic_R_RM(node, ins, baseAttr, targetReg, loPart, instOptions);
+ inst_RV_RV_TT_IV(INS_pinsrd, EA_16BYTE, targetReg, targetReg, hiPart, 0x01,
+ !compiler->canUseVexEncoding(), instOptions);
+ }
+ else
+ {
+ regNumber tmpReg = internalRegisters.GetSingle(node);
+ genHWIntrinsic_R_RM(node, ins, baseAttr, targetReg, loPart, instOptions);
+ genHWIntrinsic_R_RM(node, ins, baseAttr, tmpReg, hiPart, instOptions);
+ emit->emitIns_R_R(INS_punpckldq, EA_16BYTE, targetReg, tmpReg, instOptions);
+ }
+ break;
+ }
+
+ op1 = loPart;
+ }
+
+ ins = INS_movq;
+ baseAttr = EA_8BYTE;
+ }
+#endif // TARGET_X86
+
+ genHWIntrinsic_R_RM(node, ins, baseAttr, targetReg, op1, instOptions);
}
else
{
@@ -1602,6 +1918,45 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
else
{
assert(instOptions == INS_OPTS_NONE);
+
+ if (HWIntrinsicInfo::IsVectorCreateScalar(intrinsicId))
+ {
+ // If this is CreateScalar, we need to ensure the upper elements are zeroed.
+ // Scalar integer loads and loads from memory always zero the upper elements,
+ // so reg to reg copies of floating types are the only place we need to
+ // do anything different.
+
+ if (baseType == TYP_FLOAT)
+ {
+ if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41))
+ {
+ // insertps imm8 is:
+ // * Bits 0-3: zmask
+ // * Bits 4-5: count_d
+ // * Bits 6-7: count_s (register form only)
+ //
+ // We want zmask 0b1110 (0xE) to zero elements 1/2/3
+ // We want count_d 0b00 (0x0) to insert the value to element 0
+ // We want count_s 0b00 (0x0) as we're just taking element 0 of the source
+
+ emit->emitIns_SIMD_R_R_R_I(INS_insertps, attr, targetReg, targetReg, op1Reg, 0x0E,
+ instOptions);
+ }
+ else
+ {
+ assert(targetReg != op1Reg);
+ emit->emitIns_SIMD_R_R_R(INS_xorps, attr, targetReg, targetReg, targetReg, instOptions);
+ emit->emitIns_Mov(INS_movss, attr, targetReg, op1Reg, /* canSkip */ false);
+ }
+ }
+ else
+ {
+ // `movq xmm xmm` zeroes the upper 64 bits.
+ genHWIntrinsic_R_RM(node, INS_movq, attr, targetReg, op1, instOptions);
+ }
+ break;
+ }
+
// Just use movaps for reg->reg moves as it has zero-latency on modern CPUs
emit->emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
}
@@ -1783,6 +2138,20 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
}
genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1, instOptions);
}
+ else if (varTypeIsIntegral(baseType))
+ {
+ assert(!varTypeIsLong(baseType) || TargetArchitecture::Is64Bit);
+ assert(HWIntrinsicInfo::IsVectorToScalar(intrinsicId));
+
+ attr = emitActualTypeSize(baseType);
+ genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1, instOptions);
+
+ if (varTypeIsSmall(baseType))
+ {
+ emit->emitIns_Mov(ins_Move_Extend(baseType, /* srcInReg */ true), emitTypeSize(baseType), targetReg,
+ targetReg, /* canSkip */ false);
+ }
+ }
else
{
assert(varTypeIsFloating(baseType));
@@ -2251,7 +2620,8 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
// can also occur if the consumer calls it directly and just doesn't pass a constant value.
regNumber baseReg = internalRegisters.Extract(node);
regNumber offsReg = internalRegisters.GetSingle(node);
- genHWIntrinsicJumpTableFallback(intrinsicId, op2->GetRegNum(), baseReg, offsReg, emitSwCase);
+ genHWIntrinsicJumpTableFallback(intrinsicId, ins, EA_16BYTE, op2->GetRegNum(), baseReg, offsReg,
+ emitSwCase);
}
break;
}
@@ -2297,15 +2667,47 @@ void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
assert(!op2->isUsedFromReg() || (op2->GetRegNum() != targetReg) || (op1Reg == targetReg));
emit->emitIns_Mov(INS_mov, emitTypeSize(targetType), targetReg, op1Reg, /* canSkip */ true);
+ instruction ins = INS_crc32;
+#ifdef TARGET_AMD64
+ bool needsEvex = false;
+ if (emit->IsExtendedGPReg(targetReg))
+ {
+ needsEvex = true;
+ }
+ else if (op2->isUsedFromReg() && emit->IsExtendedGPReg(op2->GetRegNum()))
+ {
+ needsEvex = true;
+ }
+ else if (op2->isIndir())
+ {
+ GenTreeIndir* indir = op2->AsIndir();
+
+ // We don't need to check if they are actually enregistered.
+ if (indir->HasBase() && emit->IsExtendedGPReg(indir->Base()->GetRegNum()))
+ {
+ needsEvex = true;
+ }
+
+ if (indir->HasIndex() && emit->IsExtendedGPReg(indir->Index()->GetRegNum()))
+ {
+ needsEvex = true;
+ }
+ }
+
+ if (needsEvex)
+ {
+ ins = INS_crc32_apx;
+ }
+#endif // TARGET_AMD64
if ((baseType == TYP_UBYTE) || (baseType == TYP_USHORT)) // baseType is the type of the second argument
{
assert(targetType == TYP_INT);
- genHWIntrinsic_R_RM(node, INS_crc32, emitTypeSize(baseType), targetReg, op2, instOptions);
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(baseType), targetReg, op2, instOptions);
}
else
{
assert((targetType == TYP_INT) || (targetType == TYP_LONG));
- genHWIntrinsic_R_RM(node, INS_crc32, emitTypeSize(targetType), targetReg, op2, instOptions);
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(targetType), targetReg, op2, instOptions);
}
break;
diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h
index efad7ee0980253..bcd837f48ec750 100644
--- a/src/coreclr/jit/hwintrinsiclistarm64.h
+++ b/src/coreclr/jit/hwintrinsiclistarm64.h
@@ -91,6 +91,8 @@ HARDWARE_INTRINSIC(Vector64, Narrow,
HARDWARE_INTRINSIC(Vector64, Round, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector64, ShiftLeft, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
+HARDWARE_INTRINSIC(Vector64, ShuffleNative, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
+HARDWARE_INTRINSIC(Vector64, ShuffleNativeFallback, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
HARDWARE_INTRINSIC(Vector64, Sqrt, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector64, StoreAligned, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, StoreAlignedNonTemporal, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
@@ -213,6 +215,8 @@ HARDWARE_INTRINSIC(Vector128, Narrow,
HARDWARE_INTRINSIC(Vector128, Round, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
+HARDWARE_INTRINSIC(Vector128, ShuffleNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
+HARDWARE_INTRINSIC(Vector128, ShuffleNativeFallback, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h
index 7d572587bbaab4..a49d4b4bdc66bf 100644
--- a/src/coreclr/jit/hwintrinsiclistxarch.h
+++ b/src/coreclr/jit/hwintrinsiclistxarch.h
@@ -62,7 +62,7 @@ HARDWARE_INTRINSIC(Vector128, ConvertToUInt32Native,
HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, ConvertToUInt64Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128, CreateSequence, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
@@ -109,12 +109,14 @@ HARDWARE_INTRINSIC(Vector128, Narrow,
HARDWARE_INTRINSIC(Vector128, Round, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
+HARDWARE_INTRINSIC(Vector128, ShuffleNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
+HARDWARE_INTRINSIC(Vector128, ShuffleNativeFallback, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, StoreUnsafe, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, Sum, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128, ToVector256, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128, ToVector256Unsafe, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128, ToVector512, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
@@ -179,7 +181,7 @@ HARDWARE_INTRINSIC(Vector256, ConvertToUInt32Native,
HARDWARE_INTRINSIC(Vector256, ConvertToUInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector256, ConvertToUInt64Native, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector256, Create, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible)
-HARDWARE_INTRINSIC(Vector256, CreateScalar, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible)
+HARDWARE_INTRINSIC(Vector256, CreateScalar, 32, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible)
HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible)
HARDWARE_INTRINSIC(Vector256, CreateSequence, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible)
HARDWARE_INTRINSIC(Vector256, Dot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
@@ -228,12 +230,14 @@ HARDWARE_INTRINSIC(Vector256, Narrow,
HARDWARE_INTRINSIC(Vector256, Round, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector256, ShiftLeft, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector256, Shuffle, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
+HARDWARE_INTRINSIC(Vector256, ShuffleNative, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
+HARDWARE_INTRINSIC(Vector256, ShuffleNativeFallback, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
HARDWARE_INTRINSIC(Vector256, Sqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible)
HARDWARE_INTRINSIC(Vector256, StoreAligned, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible)
HARDWARE_INTRINSIC(Vector256, StoreAlignedNonTemporal, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible)
HARDWARE_INTRINSIC(Vector256, StoreUnsafe, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible)
HARDWARE_INTRINSIC(Vector256, Sum, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(Vector256, ToScalar, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible)
+HARDWARE_INTRINSIC(Vector256, ToScalar, 32, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible)
HARDWARE_INTRINSIC(Vector256, ToVector512, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector256, ToVector512Unsafe, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector256, Truncate, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
@@ -299,7 +303,7 @@ HARDWARE_INTRINSIC(Vector512, ConvertToUInt32Native,
HARDWARE_INTRINSIC(Vector512, ConvertToUInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector512, ConvertToUInt64Native, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector512, Create, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector512, CreateScalar, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector512, CreateScalar, 64, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector512, CreateScalarUnsafe, 64, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector512, CreateSequence, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, Dot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
@@ -349,12 +353,14 @@ HARDWARE_INTRINSIC(Vector512, Narrow,
HARDWARE_INTRINSIC(Vector512, Round, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, ShiftLeft, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, Shuffle, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
+HARDWARE_INTRINSIC(Vector512, ShuffleNative, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
+HARDWARE_INTRINSIC(Vector512, ShuffleNativeFallback, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
HARDWARE_INTRINSIC(Vector512, Sqrt, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector512, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector512, StoreUnsafe, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector512, Sum, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(Vector512, ToScalar, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(Vector512, ToScalar, 64, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector512, Truncate, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, WidenLower, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector512, WidenUpper, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
index 33278184d02d29..e16951558ab85a 100644
--- a/src/coreclr/jit/hwintrinsicxarch.cpp
+++ b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -2125,16 +2125,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
{
if (sig->numArgs == 1)
{
-#if defined(TARGET_X86)
- if (varTypeIsLong(simdBaseType) && !impStackTop(0).val->IsIntegralConst())
- {
- // TODO-XARCH-CQ: It may be beneficial to emit the movq
- // instruction, which takes a 64-bit memory address and
- // works on 32-bit x86 systems.
- break;
- }
-#endif // TARGET_X86
-
op1 = impPopStack().val;
retNode = gtNewSimdCreateBroadcastNode(retType, op1, simdBaseJitType, simdSize);
break;
@@ -2266,16 +2256,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}
-#if defined(TARGET_X86)
- if (varTypeIsLong(simdBaseType))
- {
- // TODO-XARCH-CQ: It may be beneficial to emit the movq
- // instruction, which takes a 64-bit memory address and
- // works on 32-bit x86 systems.
- break;
- }
-#endif // TARGET_X86
-
IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), sig->numArgs);
// TODO-CQ: We don't handle contiguous args for anything except TYP_FLOAT today
@@ -2321,16 +2301,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 1);
-#if defined(TARGET_X86)
- if (varTypeIsLong(simdBaseType) && !impStackTop(0).val->IsIntegralConst())
- {
- // TODO-XARCH-CQ: It may be beneficial to emit the movq
- // instruction, which takes a 64-bit memory address and
- // works on 32-bit x86 systems.
- break;
- }
-#endif // TARGET_X86
-
op1 = impPopStack().val;
retNode = gtNewSimdCreateScalarNode(retType, op1, simdBaseJitType, simdSize);
break;
@@ -2342,16 +2312,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 1);
-#if defined(TARGET_X86)
- if (varTypeIsLong(simdBaseType) && !impStackTop(0).val->IsIntegralConst())
- {
- // TODO-XARCH-CQ: It may be beneficial to emit the movq
- // instruction, which takes a 64-bit memory address and
- // works on 32-bit x86 systems.
- break;
- }
-#endif // TARGET_X86
-
op1 = impPopStack().val;
retNode = gtNewSimdCreateScalarUnsafeNode(retType, op1, simdBaseJitType, simdSize);
break;
@@ -2376,27 +2336,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}
}
-
- if (varTypeIsLong(simdBaseType))
- {
- if (!impStackTop(0).val->OperIsConst())
- {
- // When op2 is a constant, we can skip the multiplication allowing us to always
- // generate better code. However, if it isn't then we need to fallback in the
- // cases where multiplication isn't supported.
-
- if ((simdSize != 64) && !canUseEvexEncoding())
- {
- // TODO-XARCH-CQ: We should support long/ulong multiplication
- break;
- }
- }
-
-#if defined(TARGET_X86)
- // TODO-XARCH-CQ: We need to support 64-bit CreateBroadcast
- break;
-#endif // TARGET_X86
- }
}
impSpillSideEffect(true, stackState.esStackDepth -
@@ -2462,14 +2401,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}
-#if defined(TARGET_X86)
- if (varTypeIsLong(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_SSE41))
- {
- // We need SSE41 to handle long, use software fallback
- break;
- }
-#endif // TARGET_X86
-
op2 = impSIMDPopStack();
op1 = impSIMDPopStack();
@@ -2765,13 +2696,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
case TYP_LONG:
case TYP_ULONG:
{
- bool useToScalar = op2->IsIntegralConst(0);
-
-#if defined(TARGET_X86)
- useToScalar &= !varTypeIsLong(simdBaseType);
-#endif // TARGET_X86
-
- if (!useToScalar && !compOpportunisticallyDependsOn(InstructionSet_SSE41))
+ if (!op2->IsIntegralConst(0) && !compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
// Using software fallback if simdBaseType is not supported by hardware
return nullptr;
@@ -3349,15 +3274,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}
-#if defined(TARGET_X86)
- if (varTypeIsLong(simdBaseType))
- {
- // TODO-XARCH-CQ: We can't handle long here, only because one of the args might
- // be scalar, and gtNewSimdCreateBroadcastNode doesn't handle long on x86.
- break;
- }
-#endif // TARGET_X86
-
CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
var_types argType = TYP_UNKNOWN;
@@ -3514,18 +3430,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 2);
-#if defined(TARGET_X86)
- if ((simdBaseType == TYP_LONG) || (simdBaseType == TYP_DOUBLE))
- {
- if (!compOpportunisticallyDependsOn(InstructionSet_EVEX) && !impStackTop(0).val->IsCnsIntOrI())
- {
- // If vpsraq is available, we can use that. We can also trivially emulate arithmetic shift by const
- // amount. Otherwise, more work is required for long types, so we fall back to managed for now.
- break;
- }
- }
-#endif // TARGET_X86
-
if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2))
{
genTreeOps op = varTypeIsUnsigned(simdBaseType) ? GT_RSZ : GT_RSH;
@@ -3616,37 +3520,62 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
case NI_Vector128_Shuffle:
case NI_Vector256_Shuffle:
case NI_Vector512_Shuffle:
+ case NI_Vector128_ShuffleNative:
+ case NI_Vector256_ShuffleNative:
+ case NI_Vector512_ShuffleNative:
+ case NI_Vector128_ShuffleNativeFallback:
+ case NI_Vector256_ShuffleNativeFallback:
+ case NI_Vector512_ShuffleNativeFallback:
{
assert((sig->numArgs == 2) || (sig->numArgs == 3));
+ // The Native variants are non-deterministic on xarch
+ bool isShuffleNative = (intrinsic != NI_Vector128_Shuffle) && (intrinsic != NI_Vector256_Shuffle) &&
+ (intrinsic != NI_Vector512_Shuffle);
+ if (isShuffleNative && BlockNonDeterministicIntrinsics(mustExpand))
+ {
+ break;
+ }
+
GenTree* indices = impStackTop(0).val;
- if (!indices->IsCnsVec() || !IsValidForShuffle(indices->AsVecCon(), simdSize, simdBaseType))
+ // Check if the required intrinsics are available to emit now (validForShuffle). If we have variable
+ // indices that might become possible to emit later (due to them becoming constant), this will be
+ // indicated in canBecomeValidForShuffle; otherwise, it's just the same as validForShuffle.
+ bool canBecomeValidForShuffle = false;
+ bool validForShuffle =
+ IsValidForShuffle(indices, simdSize, simdBaseType, &canBecomeValidForShuffle, isShuffleNative);
+
+ // If it isn't valid for shuffle (and can't become valid later), then give up now.
+ if (!canBecomeValidForShuffle)
+ {
+ return nullptr;
+ }
+
+ // If the indices might become constant later, then we don't emit for now, delay until later.
+ if ((!validForShuffle) || (!indices->IsCnsVec()))
{
assert(sig->numArgs == 2);
- if (!opts.OptimizationEnabled())
+ if (opts.OptimizationEnabled())
{
// Only enable late stage rewriting if optimizations are enabled
// as we won't otherwise encounter a constant at the later point
- return nullptr;
- }
-
- op2 = impSIMDPopStack();
- op1 = impSIMDPopStack();
+ op2 = impSIMDPopStack();
+ op1 = impSIMDPopStack();
- retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
+ retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
- retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint));
- break;
+ retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint));
+ break;
+ }
}
if (sig->numArgs == 2)
{
- op2 = impSIMDPopStack();
- op1 = impSIMDPopStack();
-
- retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize);
+ op2 = impSIMDPopStack();
+ op1 = impSIMDPopStack();
+ retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize, isShuffleNative);
}
break;
}
@@ -3781,14 +3710,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 1);
-#if defined(TARGET_X86)
- if (varTypeIsLong(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_SSE41))
- {
- // We need SSE41 to handle long, use software fallback
- break;
- }
-#endif // TARGET_X86
-
op1 = impSIMDPopStack();
retNode = gtNewSimdSumNode(retType, op1, simdBaseJitType, simdSize);
break;
@@ -3800,14 +3721,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 1);
-#if defined(TARGET_X86)
- if (varTypeIsLong(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_SSE41))
- {
- // We need SSE41 to handle long, use software fallback
- break;
- }
-#endif // TARGET_X86
-
op1 = impSIMDPopStack();
retNode = gtNewSimdToScalarNode(retType, op1, simdBaseJitType, simdSize);
break;
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index c4fae10ce005ab..12b0cfb2588b50 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -3861,7 +3861,7 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI
int simdWidth = getSIMDTypeSizeInBytes(fieldClsHnd);
if ((simdWidth > 0) && IsBaselineSimdIsaSupported())
{
- assert((totalSize <= 32) && (totalSize <= MaxStructSize));
+ assert((totalSize <= 64) && (totalSize <= MaxStructSize));
var_types simdType = getSIMDTypeForSize(simdWidth);
bool hwAccelerated = true;
@@ -4747,12 +4747,15 @@ void Compiler::impImportLeaveEHRegions(BasicBlock* block)
}
#endif
- unsigned finallyNesting = compHndBBtab[XTnum].ebdHandlerNestingLevel;
- assert(finallyNesting <= compHndBBtabCount);
+ // We now record the EH region ID on GT_END_LFIN instead of the finally nesting depth,
+ // as the later can change as we optimize the code.
+ //
+ unsigned const ehID = compHndBBtab[XTnum].ebdID;
+ assert(ehID <= impInlineRoot()->compEHID);
- GenTree* endLFin = new (this, GT_END_LFIN) GenTreeVal(GT_END_LFIN, TYP_VOID, finallyNesting);
- endLFinStmt = gtNewStmt(endLFin);
- endCatches = NULL;
+ GenTree* const endLFin = new (this, GT_END_LFIN) GenTreeVal(GT_END_LFIN, TYP_VOID, ehID);
+ endLFinStmt = gtNewStmt(endLFin);
+ endCatches = NULL;
encFinallies++;
}
@@ -6974,9 +6977,9 @@ void Compiler::impImportBlockCode(BasicBlock* block)
case CEE_ENDFINALLY:
- if (compIsForInlining())
+ if (compIsForInlining() && !opts.compInlineMethodsWithEH)
{
- assert(!"Shouldn't have exception handlers in the inliner!");
+ assert(!"Shouldn't have exception handlers in the inlinee!");
compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_ENDFINALLY);
return;
}
@@ -6998,9 +7001,9 @@ void Compiler::impImportBlockCode(BasicBlock* block)
case CEE_ENDFILTER:
- if (compIsForInlining())
+ if (compIsForInlining() && !opts.compInlineMethodsWithEH)
{
- assert(!"Shouldn't have exception handlers in the inliner!");
+ assert(!"Shouldn't have exception handlers in the inlinee!");
compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_ENDFILTER);
return;
}
@@ -7572,7 +7575,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
LEAVE:
- if (compIsForInlining())
+ if (compIsForInlining() && !opts.compInlineMethodsWithEH)
{
compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_LEAVE);
return;
@@ -11481,7 +11484,7 @@ inline void Compiler::impReimportMarkBlock(BasicBlock* block)
void Compiler::impVerifyEHBlock(BasicBlock* block)
{
assert(block->hasTryIndex());
- assert(!compIsForInlining());
+ assert(!compIsForInlining() || opts.compInlineMethodsWithEH);
unsigned tryIndex = block->getTryIndex();
EHblkDsc* HBtab = ehGetDsc(tryIndex);
@@ -12552,9 +12555,8 @@ void Compiler::impImport()
// If the method had EH, we may be missing some pred edges
// (notably those from BBJ_EHFINALLYRET blocks). Add them.
- // Only needed for the root method, since inlinees can't have EH.
//
- if (!compIsForInlining() && (info.compXcptnsCount > 0))
+ if (info.compXcptnsCount > 0)
{
impFixPredLists();
JITDUMP("\nAfter impImport() added blocks for try,catch,finally");
@@ -12962,7 +12964,7 @@ void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, I
//
// Arguments:
// fncHandle -- inline candidate method
-// methInfo -- method info from VN
+// methInfo -- method info from VM
// forceInline -- true if method is marked with AggressiveInlining
// inlineResult -- ongoing inline evaluation
//
@@ -12976,10 +12978,13 @@ void Compiler::impCanInlineIL(CORINFO_METHOD_HANDLE fncHandle,
// We shouldn't have made up our minds yet...
assert(!inlineResult->IsDecided());
- if (methInfo->EHcount)
+ if (methInfo->EHcount > 0)
{
- inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_EH);
- return;
+ if (!opts.compInlineMethodsWithEH)
+ {
+ inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_EH);
+ return;
+ }
}
if ((methInfo->ILCode == nullptr) || (codeSize == 0))
diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp
index 603cd256109134..4db4d09c751871 100644
--- a/src/coreclr/jit/importercalls.cpp
+++ b/src/coreclr/jit/importercalls.cpp
@@ -3671,6 +3671,8 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
case NI_System_Span_get_Item:
case NI_System_ReadOnlySpan_get_Item:
{
+ optMethodFlags |= OMF_HAS_ARRAYREF;
+
// Have index, stack pointer-to Span s on the stack. Expand to:
//
// For Span
@@ -7763,6 +7765,25 @@ void Compiler::impMarkInlineCandidateHelper(GenTreeCall* call,
return;
}
+ if (inlineCandidateInfo->methInfo.EHcount > 0)
+ {
+ // We cannot inline methods with EH into filter clauses, even if marked as aggressive inline
+ //
+ if (bbInFilterBBRange(compCurBB))
+ {
+ inlineResult->NoteFatal(InlineObservation::CALLSITE_IS_WITHIN_FILTER);
+ return;
+ }
+
+ // Do not inline pinvoke stubs with EH.
+ //
+ if ((methAttr & CORINFO_FLG_PINVOKE) != 0)
+ {
+ inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_EH);
+ return;
+ }
+ }
+
// The old value should be null OR this call should be a guarded devirtualization candidate.
assert(call->IsGuardedDevirtualizationCandidate() || (call->GetSingleInlineCandidateInfo() == nullptr));
@@ -10588,7 +10609,7 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
{
namespaceName += 1;
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
if (strcmp(namespaceName, "Buffers.Binary") == 0)
{
if (strcmp(className, "BinaryPrimitives") == 0)
@@ -10638,17 +10659,16 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
if (isVectorT || (strcmp(className, "Vector") == 0))
{
- if (strncmp(methodName,
- "System.Runtime.Intrinsics.ISimdVector APIs to still be expanded where
// possible but, they all prefix the qualified name of the interface first, so we'll
// check for that and skip the prefix before trying to resolve the method.
- if (strncmp(methodName + 70, ",T>.", 7) == 0)
+ if (strncmp(methodName + 60, ",T>.", 7) == 0)
{
- methodName += 77;
+ methodName += 67;
}
}
diff --git a/src/coreclr/jit/inline.def b/src/coreclr/jit/inline.def
index 2b045ad5d20009..44d6e83929e0ba 100644
--- a/src/coreclr/jit/inline.def
+++ b/src/coreclr/jit/inline.def
@@ -133,6 +133,7 @@ INLINE_OBSERVATION(CANT_CLASS_INIT, bool, "can't class init",
INLINE_OBSERVATION(COMPILATION_ERROR, bool, "compilation error", FATAL, CALLSITE)
INLINE_OBSERVATION(COMPILATION_FAILURE, bool, "failed to compile", FATAL, CALLSITE)
INLINE_OBSERVATION(EXPLICIT_TAIL_PREFIX, bool, "explicit tail prefix", FATAL, CALLSITE)
+INLINE_OBSERVATION(EH_TABLE_FULL, bool, "callee has eh, eh table is full", FATAL, CALLSITE)
INLINE_OBSERVATION(GENERIC_DICTIONARY_LOOKUP, bool, "runtime dictionary lookup", FATAL, CALLSITE)
INLINE_OBSERVATION(HAS_CALL_VIA_LDVIRTFTN, bool, "call via ldvirtftn", FATAL, CALLSITE)
INLINE_OBSERVATION(HAS_COMPLEX_HANDLE, bool, "complex handle access", FATAL, CALLSITE)
diff --git a/src/coreclr/jit/inlinepolicy.cpp b/src/coreclr/jit/inlinepolicy.cpp
index d22676a62a3ea2..357cf6090b7957 100644
--- a/src/coreclr/jit/inlinepolicy.cpp
+++ b/src/coreclr/jit/inlinepolicy.cpp
@@ -911,21 +911,6 @@ int DefaultPolicy::DetermineCallsiteNativeSizeEstimate(CORINFO_METHOD_INFO* meth
void DefaultPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
{
-
-#if defined(DEBUG)
-
- // Punt if we're inlining and we've reached the acceptance limit.
- int limit = JitConfig.JitInlineLimit();
- unsigned current = m_RootCompiler->m_inlineStrategy->GetInlineCount();
-
- if (!m_IsPrejitRoot && (limit >= 0) && (current >= static_cast(limit)))
- {
- SetFailure(InlineObservation::CALLSITE_OVER_INLINE_LIMIT);
- return;
- }
-
-#endif // defined(DEBUG)
-
assert(InlDecisionIsCandidate(m_Decision));
assert(m_Observation == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
@@ -1134,20 +1119,6 @@ void RandomPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
assert(InlDecisionIsCandidate(m_Decision));
assert(m_Observation == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
-#if defined(DEBUG)
-
- // Punt if we're inlining and we've reached the acceptance limit.
- int limit = JitConfig.JitInlineLimit();
- unsigned current = m_RootCompiler->m_inlineStrategy->GetInlineCount();
-
- if (!m_IsPrejitRoot && (limit >= 0) && (current >= static_cast(limit)))
- {
- SetFailure(InlineObservation::CALLSITE_OVER_INLINE_LIMIT);
- return;
- }
-
-#endif // defined(DEBUG)
-
// Budget check.
const bool overBudget = this->BudgetCheck();
if (overBudget)
@@ -2400,21 +2371,6 @@ bool DiscretionaryPolicy::PropagateNeverToRuntime() const
void DiscretionaryPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
{
-
-#if defined(DEBUG)
-
- // Punt if we're inlining and we've reached the acceptance limit.
- int limit = JitConfig.JitInlineLimit();
- unsigned current = m_RootCompiler->m_inlineStrategy->GetInlineCount();
-
- if (!m_IsPrejitRoot && (limit >= 0) && (current >= static_cast(limit)))
- {
- SetFailure(InlineObservation::CALLSITE_OVER_INLINE_LIMIT);
- return;
- }
-
-#endif // defined(DEBUG)
-
// Make additional observations based on the method info
MethodInfoObservations(methodInfo);
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index fe3da0a63eb904..bd614016bdb24d 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -920,6 +920,9 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op)
break;
}
+ case NI_Vector128_CreateScalar:
+ case NI_Vector256_CreateScalar:
+ case NI_Vector512_CreateScalar:
case NI_Vector128_CreateScalarUnsafe:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_CreateScalarUnsafe:
@@ -927,7 +930,7 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op)
// The hwintrinsic should be contained and its
// op1 should be either contained or spilled. This
// allows us to transparently "look through" the
- // CreateScalarUnsafe and treat it directly like
+ // CreateScalar/Unsafe and treat it directly like
// a load from memory.
assert(hwintrinsic->isContained());
diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h
index e90102caf241fe..305a8f7124d7df 100644
--- a/src/coreclr/jit/instr.h
+++ b/src/coreclr/jit/instr.h
@@ -532,7 +532,6 @@ enum insOpts : unsigned
INS_OPTS_JALR, // see ::emitIns_J_R().
INS_OPTS_J, // see ::emitIns_J().
INS_OPTS_J_cond, // see ::emitIns_J_cond_la().
- INS_OPTS_I, // see ::emitLoadImmediate().
INS_OPTS_C, // see ::emitIns_Call().
INS_OPTS_RELOC, // see ::emitIns_R_AI().
};
diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h
index 5957b4deb9799b..8ce09fa6468934 100644
--- a/src/coreclr/jit/instrsxarch.h
+++ b/src/coreclr/jit/instrsxarch.h
@@ -228,10 +228,10 @@ INST3(movups, "movups", IUM_WR, PCKFLT(0x11), BAD_CODE,
INST3(mulps, "mulps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x59), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Multiply packed singles
INST3(mulss, "mulss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x59), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar single
INST3(orps, "orps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x56), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Or packed singles
-INST3(prefetchnta, "prefetchnta", IUM_RD, 0x000F0018, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG)
-INST3(prefetcht0, "prefetcht0", IUM_RD, 0x000F0818, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG)
-INST3(prefetcht1, "prefetcht1", IUM_RD, 0x000F1018, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG)
-INST3(prefetcht2, "prefetcht2", IUM_RD, 0x000F1818, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG)
+INST3(prefetchnta, "prefetchnta", IUM_RD, 0x000F0018, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG | Encoding_REX2)
+INST3(prefetcht0, "prefetcht0", IUM_RD, 0x000F0818, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG | Encoding_REX2)
+INST3(prefetcht1, "prefetcht1", IUM_RD, 0x000F1018, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG | Encoding_REX2)
+INST3(prefetcht2, "prefetcht2", IUM_RD, 0x000F1818, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG | Encoding_REX2)
INST3(rcpps, "rcpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x53), INS_TT_NONE, REX_WIG | Encoding_VEX) // Reciprocal of packed singles
INST3(rcpss, "rcpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x53), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal of scalar single
INST3(rsqrtps, "rsqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x52), INS_TT_NONE, REX_WIG | Encoding_VEX) // Reciprocal Sqrt of packed singles
@@ -280,16 +280,16 @@ INST3(mfence, "mfence", IUM_RD, 0x000FF0AE, BAD_CODE,
INST3(minpd, "minpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5D), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Return Minimum packed doubles
INST3(minsd, "minsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5D), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar double
INST3(movapd, "movapd", IUM_WR, PCKDBL(0x29), BAD_CODE, PCKDBL(0x28), INS_TT_FULL_MEM, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX)
-INST3(movd, "movd", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WX | Encoding_VEX | Encoding_EVEX) // Move DWORD/QWORD between xmm regs <-> memory/r32/r64 regs
-INST3(movdqa, "movdqa", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX)
-INST3(movdqu, "movdqu", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX)
+INST3(movd, "movd", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WX | Encoding_VEX | Encoding_EVEX | Encoding_REX2) // Move DWORD/QWORD between xmm regs <-> memory/r32/r64 regs
+INST3(movdqa, "movdqa", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Encoding_REX2)
+INST3(movdqu, "movdqu", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Encoding_REX2 )
INST3(movhpd, "movhpd", IUM_WR, PCKDBL(0x17), BAD_CODE, PCKDBL(0x16), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction)
INST3(movlpd, "movlpd", IUM_WR, PCKDBL(0x13), BAD_CODE, PCKDBL(0x12), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction)
INST3(movmskpd, "movmskpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x50), INS_TT_NONE, REX_WIG | Encoding_VEX) // Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros.
INST3(movntdq, "movntdq", IUM_WR, PCKDBL(0xE7), BAD_CODE, BAD_CODE, INS_TT_FULL_MEM, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX)
-INST3(movnti, "movnti", IUM_WR, PCKFLT(0xC3), BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WX)
+INST3(movnti, "movnti", IUM_WR, PCKFLT(0xC3), BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WX | Encoding_REX2)
INST3(movntpd, "movntpd", IUM_WR, PCKDBL(0x2B), BAD_CODE, BAD_CODE, INS_TT_FULL_MEM, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX)
-INST3(movq, "movq", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Move Quadword between memory/mm <-> regs
+INST3(movq, "movq", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | Encoding_REX2) // Move Quadword between memory/mm <-> regs
INST3(movsd_simd, "movsd", IUM_WR, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction)
INST3(movupd, "movupd", IUM_WR, PCKDBL(0x11), BAD_CODE, PCKDBL(0x10), INS_TT_FULL_MEM, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX)
INST3(mulpd, "mulpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x59), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Multiply packed doubles
@@ -602,16 +602,14 @@ INST3(blsmsk, "blsmsk", IUM_WR, BAD_CODE, BAD_CODE,
INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF) // Reset Lowest Set Bit
// BMI2
-INST3(bzhi, "bzhi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_TT_NONE, REX_WX | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF) // Zero High Bits Starting with Specified Bit Position
-INST3(mulx, "mulx", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF6), INS_TT_NONE, REX_WX | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Unsigned Multiply Without Affecting Flags
-INST3(pdep, "pdep", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_TT_NONE, REX_WX | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Deposit
-INST3(pext, "pext", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_TT_NONE, REX_WX | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Extract
-INST3(rorx, "rorx", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xF0), INS_TT_NONE, REX_WX | Encoding_VEX)
-#ifdef TARGET_AMD64
-INST3(sarx, "sarx", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0xF7), INS_TT_NONE, REX_WX | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift Arithmetic Right Without Affecting Flags
-INST3(shlx, "shlx", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_TT_NONE, REX_WX | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift Logical Left Without Affecting Flags
-INST3(shrx, "shrx", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0xF7), INS_TT_NONE, REX_WX | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift Logical Right Without Affecting Flags
-#endif
+INST3(bzhi, "bzhi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF) // Zero High Bits Starting with Specified Bit Position
+INST3(mulx, "mulx", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF6), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Unsigned Multiply Without Affecting Flags
+INST3(pdep, "pdep", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Deposit
+INST3(pext, "pext", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Extract
+INST3(rorx, "rorx", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xF0), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX)
+INST3(sarx, "sarx", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0xF7), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift Arithmetic Right Without Affecting Flags
+INST3(shlx, "shlx", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift Logical Left Without Affecting Flags
+INST3(shrx, "shrx", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0xF7), INS_TT_NONE, REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift Logical Right Without Affecting Flags
INST3(LAST_BMI_INSTRUCTION, "LAST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)
@@ -622,8 +620,8 @@ INST3(FIRST_AVX512_INSTRUCTION, "FIRST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BA
// AVX512F
INST3(kandw, "kandw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x41), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND masks
INST3(kandnw, "kandnw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x42), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND NOT masks
-INST3(kmovw_gpr, "kmovw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers
-INST3(kmovw_msk, "kmovw", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers
+INST3(kmovw_gpr, "kmovw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers
+INST3(kmovw_msk, "kmovw", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers
INST3(knotw, "knotw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x44), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // NOT mask register
INST3(korw, "korw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x45), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical OR masks
INST3(kortestw, "kortestw", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x98), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags
@@ -772,10 +770,10 @@ INST3(kandd, "kandd", IUM_WR, BAD_CODE, BAD_
INST3(kandq, "kandq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x41), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND masks
INST3(kandnd, "kandnd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x42), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND NOT masks
INST3(kandnq, "kandnq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x42), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND NOT masks
-INST3(kmovd_gpr, "kmovd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers
-INST3(kmovd_msk, "kmovd", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Move from and to mask registers
-INST3(kmovq_gpr, "kmovq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Move from and to mask registers
-INST3(kmovq_msk, "kmovq", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Move from and to mask registers
+INST3(kmovd_gpr, "kmovd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers
+INST3(kmovd_msk, "kmovd", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W1 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers
+INST3(kmovq_gpr, "kmovq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), INS_TT_NONE, REX_W1 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers
+INST3(kmovq_msk, "kmovq", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), INS_TT_NONE, REX_W1 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers
INST3(knotd, "knotd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x44), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // NOT mask register
INST3(knotq, "knotq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x44), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // NOT mask register
INST3(kord, "kord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x45), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical OR masks
@@ -838,8 +836,8 @@ INST3(kaddb, "kaddb", IUM_WR, BAD_CODE, BAD_
INST3(kaddw, "kaddw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x4A), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Add two masks
INST3(kandb, "kandb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x41), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND masks
INST3(kandnb, "kandnb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x42), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND NOT masks
-INST3(kmovb_gpr, "kmovb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers
-INST3(kmovb_msk, "kmovb", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers
+INST3(kmovb_gpr, "kmovb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers
+INST3(kmovb_msk, "kmovb", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers
INST3(knotb, "knotb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x44), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // NOT mask register
INST3(korb, "korb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x45), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical OR masks
INST3(kortestb, "kortestb", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x98), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags
@@ -919,7 +917,7 @@ INST3(vcvttps2ibs, "cvttps2ibs", IUM_WR, BAD_CODE, BAD_
INST3(vcvttps2iubs, "cvttps2iubs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6A), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD
INST3(vmpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_CODE, AVX3A(0x42), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference
-INST3(vminmaxsd, "minmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x53), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Minimum/Maximum scalar double
+INST3(vminmaxsd, "minmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x53), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Minimum/Maximum scalar double
INST3(vminmaxss, "minmaxss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x53), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Minimum/Maximum scalar single
INST3(vminmaxpd, "minmaxpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x52), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Maximum packed doubles
INST3(vminmaxps, "minmaxps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x52), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Maximum packed singles
@@ -969,24 +967,30 @@ INST3(LAST_APX_INSTRUCTION, "LAST_APX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE,
// Scalar instructions in SSE4.2
INST3(crc32, "crc32", IUM_RW, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0xF0), INS_TT_NONE, INS_FLAGS_None)
+#ifdef TARGET_AMD64
+INST3(crc32_apx, "crc32", IUM_RW, BAD_CODE, BAD_CODE, 0x0000F0, INS_TT_NONE, INS_FLAGS_None)
+#endif
// BMI1
INST3(tzcnt, "tzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBC), INS_TT_NONE, Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | Encoding_REX2) // Count the Number of Trailing Zero Bits
+#ifdef TARGET_AMD64
+INST3(tzcnt_apx, "tzcnt", IUM_WR, BAD_CODE, BAD_CODE, 0x0000F4, INS_TT_NONE, Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF) // Count the Number of Trailing Zero Bits
+#endif
// LZCNT
INST3(lzcnt, "lzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBD), INS_TT_NONE, Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | Encoding_REX2)
+#ifdef TARGET_AMD64
+INST3(lzcnt_apx, "lzcnt", IUM_WR, BAD_CODE, BAD_CODE, 0x0000F5, INS_TT_NONE, Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF)
+#endif
// MOVBE
INST3(movbe, "movbe", IUM_WR, PCKMVB(0xF1), BAD_CODE, PCKMVB(0xF0), INS_TT_NONE, INS_FLAGS_None)
// POPCNT
INST3(popcnt, "popcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xB8), INS_TT_NONE, Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Resets_CF | Encoding_REX2)
-
-#if defined(TARGET_AMD64)
-INST3(tzcnt_apx, "tzcnt", IUM_WR, BAD_CODE, BAD_CODE, 0x0000F4, INS_TT_NONE, Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF) // Count the Number of Trailing Zero Bits
-INST3(lzcnt_apx, "lzcnt", IUM_WR, BAD_CODE, BAD_CODE, 0x0000F5, INS_TT_NONE, Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF)
-INST3(popcnt_apx, "popcnt", IUM_WR, BAD_CODE, BAD_CODE, 0x000088, INS_TT_NONE, Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Resets_CF | INS_Flags_Has_NF)
-#endif // TARGET_AMD64
+#ifdef TARGET_AMD64
+INST3(popcnt_apx, "popcnt", IUM_WR, BAD_CODE, BAD_CODE, 0x000088, INS_TT_NONE, Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Resets_CF | INS_Flags_Has_NF)
+#endif
INST3(neg, "neg", IUM_RW, 0x0018F6, BAD_CODE, 0x0018F6, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
INST3(not, "not", IUM_RW, 0x0010F6, BAD_CODE, 0x0010F6, INS_TT_NONE, INS_FLAGS_None | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD)
diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
index 5fe7439d2bc658..f4fda0855d46cc 100644
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -119,6 +119,9 @@ CONFIG_INTEGER(JitInlinePrintStats, "JitInlinePrintStats", 0)
CONFIG_INTEGER(JitInlineSize, "JITInlineSize", DEFAULT_MAX_INLINE_SIZE)
CONFIG_INTEGER(JitInlineDepth, "JITInlineDepth", DEFAULT_MAX_INLINE_DEPTH)
CONFIG_INTEGER(JitForceInlineDepth, "JITForceInlineDepth", DEFAULT_MAX_FORCE_INLINE_DEPTH)
+RELEASE_CONFIG_INTEGER(JitInlineMethodsWithEH, "JitInlineMethodsWithEH", 1)
+CONFIG_STRING(JitInlineMethodsWithEHRange, "JitInlineMethodsWithEHRange")
+
CONFIG_INTEGER(JitLongAddress, "JitLongAddress", 0) // Force using the large pseudo instruction form for long address
CONFIG_INTEGER(JitMaxUncheckedOffset, "JitMaxUncheckedOffset", 8)
@@ -381,8 +384,6 @@ CONFIG_INTEGER(JitStressPromotedEvexEncoding, "JitStressPromotedEvexEncoding", 0
CONFIG_INTEGER(JitStressEvexEncoding, "JitStressEvexEncoding", 0)
#endif
-RELEASE_CONFIG_INTEGER(PreferredVectorBitWidth, "PreferredVectorBitWidth", 0) // The preferred decimal width, in bits, to use for any implicit vectorization emitted. A value less than 128 is treated as the system default.
-
//
// Hardware Intrinsic ISAs; keep in sync with clrconfigvalues.h
//
@@ -798,9 +799,6 @@ RELEASE_CONFIG_INTEGER(JitEnablePhysicalPromotion, "JitEnablePhysicalPromotion",
// Enable cross-block local assertion prop
RELEASE_CONFIG_INTEGER(JitEnableCrossBlockLocalAssertionProp, "JitEnableCrossBlockLocalAssertionProp", 1)
-// Do greedy RPO-based layout in Compiler::fgReorderBlocks.
-RELEASE_CONFIG_INTEGER(JitDoReversePostOrderLayout, "JitDoReversePostOrderLayout", 1);
-
// Enable strength reduction
RELEASE_CONFIG_INTEGER(JitEnableStrengthReduction, "JitEnableStrengthReduction", 1)
diff --git a/src/coreclr/jit/jitee.h b/src/coreclr/jit/jitee.h
index edfbafc917d470..8c6e82ec7b9dd3 100644
--- a/src/coreclr/jit/jitee.h
+++ b/src/coreclr/jit/jitee.h
@@ -44,10 +44,6 @@ class JitFlags
JIT_FLAG_SOFTFP_ABI = 30, // Enable armel calling convention
#endif
-#if defined(TARGET_XARCH)
- JIT_FLAG_VECTOR512_THROTTLING = 31, // On Xarch, 512-bit vector usage may incur CPU frequency throttling
-#endif
-
// Note: the mcs tool uses the currently unused upper flags bits when outputting SuperPMI MC file flags.
// See EXTRA_JIT_FLAGS and spmidumphelper.cpp. Currently, these are bits 56 through 63. If they overlap,
// something needs to change.
@@ -147,10 +143,6 @@ class JitFlags
FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_SOFTFP_ABI, JIT_FLAG_SOFTFP_ABI);
#endif // TARGET_ARM
-#if defined(TARGET_X86) || defined(TARGET_AMD64)
- FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_VECTOR512_THROTTLING, JIT_FLAG_VECTOR512_THROTTLING);
-#endif // TARGET_ARM
-
#undef FLAGS_EQUAL
}
diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp
index 3356349e61adc4..5290a29ca24a34 100644
--- a/src/coreclr/jit/jiteh.cpp
+++ b/src/coreclr/jit/jiteh.cpp
@@ -241,7 +241,7 @@ bool EHblkDsc::ebdIsSameTry(BasicBlock* ebdTryBeg, BasicBlock* ebdTryLast)
void EHblkDsc::DispEntry(unsigned XTnum)
{
- printf(" %2u ::", XTnum);
+ printf(" %2u %2u ::", ebdID, XTnum);
#if defined(FEATURE_EH_WINDOWS_X86)
if (ebdHandlerNestingLevel == 0)
@@ -1258,6 +1258,30 @@ EHblkDsc* Compiler::ehInitTryBlockRange(BasicBlock* blk, BasicBlock** tryBeg, Ba
return tryTab;
}
+//------------------------------------------------------------------------
+// ehFindEHblkDscById: find an eh table entry by its ID
+//
+// Argument:
+// ID to use in search
+//
+// Returns:
+// Pointer to the entry, or nullptr
+//
+EHblkDsc* Compiler::ehFindEHblkDscById(unsigned short id)
+{
+ EHblkDsc* result = nullptr;
+ for (EHblkDsc* const xtab : EHClauses(this))
+ {
+ if (xtab->ebdID == id)
+ {
+ result = xtab;
+ break;
+ }
+ }
+
+ return result;
+}
+
/*****************************************************************************
* This method updates the value of ebdTryBeg
*/
@@ -1339,8 +1363,8 @@ void Compiler::fgFindTryRegionEnds()
// Null out try end pointers to signify the given clause hasn't been visited yet.
for (EHblkDsc* const HBtab : EHClauses(this))
{
- // Ignore try regions inside handler regions.
- if (!HBtab->ebdTryLast->hasHndIndex())
+ // Ignore try regions inside funclet regions.
+ if (!UsesFunclets() || !HBtab->ebdTryLast->hasHndIndex())
{
HBtab->ebdTryLast = nullptr;
unsetTryEnds++;
@@ -1728,8 +1752,9 @@ EHblkDsc* Compiler::fgTryAddEHTableEntries(unsigned XTnum, unsigned count, bool
if (deferAdding)
{
// We can add count entries...
+ // (we may not have allocated a table, so return a dummy non-null entry)
//
- return compHndBBtab;
+ return (EHblkDsc*)(0x1);
}
if (newCount > compHndBBtabAllocCount)
@@ -3208,12 +3233,6 @@ void Compiler::dispOutgoingEHClause(unsigned num, const CORINFO_EH_CLAUSE& claus
void Compiler::fgVerifyHandlerTab()
{
- if (compIsForInlining())
- {
- // We don't inline functions with EH. Don't bother verifying the EH table in the inlinee Compiler.
- return;
- }
-
if (compHndBBtabCount == 0)
{
return;
@@ -3230,6 +3249,9 @@ void Compiler::fgVerifyHandlerTab()
// block (case 3)?
bool multipleLastBlockNormalizationDone = false; // Currently disabled
+ BitVecTraits traits(impInlineRoot()->compEHID, this);
+ BitVec ids(BitVecOps::MakeEmpty(&traits));
+
assert(compHndBBtabCount <= compHndBBtabAllocCount);
unsigned XTnum;
@@ -3237,6 +3259,11 @@ void Compiler::fgVerifyHandlerTab()
for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
{
+ // EH IDs should be unique and in range
+ //
+ assert(HBtab->ebdID < impInlineRoot()->compEHID);
+ assert(BitVecOps::TryAddElemD(&traits, ids, HBtab->ebdID));
+
assert(HBtab->ebdTryBeg != nullptr);
assert(HBtab->ebdTryLast != nullptr);
assert(HBtab->ebdHndBeg != nullptr);
@@ -3763,7 +3790,7 @@ void Compiler::fgDispHandlerTab()
return;
}
- printf("\nindex ");
+ printf("\n id, index ");
#if defined(FEATURE_EH_WINDOWS_X86)
if (!UsesFunclets())
{
diff --git a/src/coreclr/jit/jiteh.h b/src/coreclr/jit/jiteh.h
index eb4c1bfbd5baf6..34cae18ec950cb 100644
--- a/src/coreclr/jit/jiteh.h
+++ b/src/coreclr/jit/jiteh.h
@@ -89,6 +89,8 @@ struct EHblkDsc
unsigned ebdTyp; // Exception type (a class token), otherwise
};
+ unsigned short ebdID; // Unique ID for this eh descriptor (stable across add/delete/inlining)
+
EHHandlerType ebdHandlerType;
#if defined(FEATURE_EH_WINDOWS_X86)
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 6a97cfa276a49a..1e6d365b9d133f 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -4771,7 +4771,7 @@ bool Compiler::lvaIsPreSpilled(unsigned lclNum, regMaskTP preSpillMask)
//
void Compiler::lvaUpdateArgWithInitialReg(LclVarDsc* varDsc)
{
- noway_assert(varDsc->lvIsParam);
+ assert(varDsc->lvIsParam || varDsc->lvIsParamRegTarget);
if (varDsc->lvIsRegCandidate())
{
@@ -4790,20 +4790,11 @@ void Compiler::lvaUpdateArgsWithInitialReg()
return;
}
- for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
+ for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++)
{
LclVarDsc* varDsc = lvaGetDesc(lclNum);
- if (varDsc->lvPromoted)
- {
- for (unsigned fieldVarNum = varDsc->lvFieldLclStart;
- fieldVarNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldVarNum)
- {
- LclVarDsc* fieldVarDsc = lvaGetDesc(fieldVarNum);
- lvaUpdateArgWithInitialReg(fieldVarDsc);
- }
- }
- else
+ if (varDsc->lvIsParam || varDsc->lvIsParamRegTarget)
{
lvaUpdateArgWithInitialReg(varDsc);
}
diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp
index c999e1b1721c23..8e7e894dde2ce1 100644
--- a/src/coreclr/jit/loopcloning.cpp
+++ b/src/coreclr/jit/loopcloning.cpp
@@ -45,6 +45,22 @@ void ArrIndex::PrintBoundsCheckNodes(unsigned dim /* = -1 */)
}
}
+//--------------------------------------------------------------------------------------------------
+// Print: debug print an SpanIndex struct in form: `V01[V02]`.
+//
+void SpanIndex::Print()
+{
+ printf("V%02d[V%02d]", lenLcl, indLcl);
+}
+
+//--------------------------------------------------------------------------------------------------
+// PrintBoundsCheckNode: - debug print an SpanIndex struct bounds check node tree id
+//
+void SpanIndex::PrintBoundsCheckNode()
+{
+ Compiler::printTreeID(bndsChk);
+}
+
#endif // DEBUG
//--------------------------------------------------------------------------------------------------
@@ -80,6 +96,10 @@ GenTree* LC_Array::ToGenTree(Compiler* comp, BasicBlock* bb)
arrAddr->gtFlags |= GTF_INX_ADDR_NONNULL;
arr = comp->gtNewIndexIndir(arrAddr->AsIndexAddr());
+
+ // We don't really need to call morph here if we import arr[i] directly
+ // without gtNewArrayIndexAddr (but it's a bit of verbose).
+ arr = comp->fgMorphTree(arr);
}
// If asked for arrlen invoke arr length operator.
if (oper == ArrLen)
@@ -111,6 +131,20 @@ GenTree* LC_Array::ToGenTree(Compiler* comp, BasicBlock* bb)
return nullptr;
}
+//--------------------------------------------------------------------------------------------------
+// ToGenTree: Convert a Span.Length operation into a GenTree node.
+//
+// Arguments:
+// comp - Compiler instance to allocate trees
+//
+// Return Values:
+// Returns the gen tree representation for Span.Length
+//
+GenTree* LC_Span::ToGenTree(Compiler* comp)
+{
+ return comp->gtNewLclvNode(spanIndex->lenLcl, comp->lvaTable[spanIndex->lenLcl].lvType);
+}
+
//--------------------------------------------------------------------------------------------------
// ToGenTree - Convert an "identifier" into a GenTree node.
//
@@ -134,6 +168,8 @@ GenTree* LC_Ident::ToGenTree(Compiler* comp, BasicBlock* bb)
return comp->gtNewLclvNode(lclNum, comp->lvaTable[lclNum].lvType);
case ArrAccess:
return arrAccess.ToGenTree(comp, bb);
+ case SpanAccess:
+ return spanAccess.ToGenTree(comp);
case Null:
return comp->gtNewIconNode(0, TYP_REF);
case ClassHandle:
@@ -861,54 +897,12 @@ BasicBlock* LoopCloneContext::CondToStmtInBlock(Compiler*
//
const weight_t fastLikelihood = fastPathWeightScaleFactor;
- // Choose how to generate the conditions
- const bool generateOneConditionPerBlock = true;
-
- if (generateOneConditionPerBlock)
- {
- // N = conds.Size() branches must all be true to execute the fast loop.
- // Use the N'th root....
- //
- const weight_t fastLikelihoodPerBlock = exp(log(fastLikelihood) / (weight_t)conds.Size());
-
- for (unsigned i = 0; i < conds.Size(); ++i)
- {
- BasicBlock* newBlk = comp->fgNewBBafter(BBJ_COND, insertAfter, /*extendRegion*/ true);
- newBlk->inheritWeight(insertAfter);
-
- JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", newBlk->bbNum, slowPreheader->bbNum);
- FlowEdge* const trueEdge = comp->fgAddRefPred(slowPreheader, newBlk);
- newBlk->SetTrueEdge(trueEdge);
- trueEdge->setLikelihood(1 - fastLikelihoodPerBlock);
-
- if (insertAfter->KindIs(BBJ_COND))
- {
- JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", insertAfter->bbNum, newBlk->bbNum);
- FlowEdge* const falseEdge = comp->fgAddRefPred(newBlk, insertAfter);
- insertAfter->SetFalseEdge(falseEdge);
- falseEdge->setLikelihood(fastLikelihoodPerBlock);
- }
-
- JITDUMP("Adding conditions %u to " FMT_BB "\n", i, newBlk->bbNum);
-
- GenTree* cond = conds[i].ToGenTree(comp, newBlk, /* invert */ true);
- GenTree* jmpTrueTree = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, cond);
- Statement* stmt = comp->fgNewStmtFromTree(jmpTrueTree);
-
- comp->fgInsertStmtAtEnd(newBlk, stmt);
-
- // Remorph.
- JITDUMP("Loop cloning condition tree before morphing:\n");
- DBEXEC(comp->verbose, comp->gtDispTree(jmpTrueTree));
- JITDUMP("\n");
- comp->fgMorphBlockStmt(newBlk, stmt DEBUGARG("Loop cloning condition"));
-
- insertAfter = newBlk;
- }
+ // N = conds.Size() branches must all be true to execute the fast loop.
+ // Use the N'th root....
+ //
+ const weight_t fastLikelihoodPerBlock = exp(log(fastLikelihood) / (weight_t)conds.Size());
- return insertAfter;
- }
- else
+ for (unsigned i = 0; i < conds.Size(); ++i)
{
BasicBlock* newBlk = comp->fgNewBBafter(BBJ_COND, insertAfter, /*extendRegion*/ true);
newBlk->inheritWeight(insertAfter);
@@ -916,43 +910,28 @@ BasicBlock* LoopCloneContext::CondToStmtInBlock(Compiler*
JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", newBlk->bbNum, slowPreheader->bbNum);
FlowEdge* const trueEdge = comp->fgAddRefPred(slowPreheader, newBlk);
newBlk->SetTrueEdge(trueEdge);
- trueEdge->setLikelihood(1.0 - fastLikelihood);
+ trueEdge->setLikelihood(1 - fastLikelihoodPerBlock);
if (insertAfter->KindIs(BBJ_COND))
{
JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", insertAfter->bbNum, newBlk->bbNum);
FlowEdge* const falseEdge = comp->fgAddRefPred(newBlk, insertAfter);
insertAfter->SetFalseEdge(falseEdge);
- falseEdge->setLikelihood(fastLikelihood);
+ falseEdge->setLikelihood(fastLikelihoodPerBlock);
}
- JITDUMP("Adding conditions to " FMT_BB "\n", newBlk->bbNum);
+ JITDUMP("Adding conditions %u to " FMT_BB "\n", i, newBlk->bbNum);
- // Get the first condition.
- GenTree* cond = conds[0].ToGenTree(comp, newBlk, /* invert */ false);
- for (unsigned i = 1; i < conds.Size(); ++i)
- {
- // Append all conditions using AND operator.
- cond = comp->gtNewOperNode(GT_AND, TYP_INT, cond, conds[i].ToGenTree(comp, newBlk, /* invert */ false));
- }
-
- // Add "cond == 0" node
- cond = comp->gtNewOperNode(GT_EQ, TYP_INT, cond, comp->gtNewIconNode(0));
-
- // Add jmpTrue "cond == 0"
+ GenTree* cond = conds[i].ToGenTree(comp, newBlk, /* invert */ true);
+ cond->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
GenTree* jmpTrueTree = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, cond);
Statement* stmt = comp->fgNewStmtFromTree(jmpTrueTree);
comp->fgInsertStmtAtEnd(newBlk, stmt);
-
- // Remorph.
- JITDUMP("Loop cloning condition tree before morphing:\n");
- DBEXEC(comp->verbose, comp->gtDispTree(jmpTrueTree));
- JITDUMP("\n");
- comp->fgMorphBlockStmt(newBlk, stmt DEBUGARG("Loop cloning condition"));
-
- return newBlk;
+ insertAfter = newBlk;
}
+
+ return insertAfter;
}
//--------------------------------------------------------------------------------------------------
@@ -1133,6 +1112,10 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl
JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loop->GetIndex());
assert(optInfos->Size() > 0);
+ // If we have spans, that means we have to be careful about the stride (see below).
+ //
+ bool hasSpans = false;
+
// We only need to check for iteration behavior if we have array checks.
//
bool checkIterationBehavior = false;
@@ -1147,6 +1130,11 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl
checkIterationBehavior = true;
break;
+ case LcOptInfo::LcSpan:
+ checkIterationBehavior = true;
+ hasSpans = true;
+ break;
+
case LcOptInfo::LcTypeTest:
{
LcTypeTestOptInfo* ttInfo = optInfo->AsLcTypeTestOptInfo();
@@ -1207,16 +1195,22 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl
}
const bool isIncreasingLoop = iterInfo->IsIncreasingLoop();
- assert(isIncreasingLoop || iterInfo->IsDecreasingLoop());
+ if (!isIncreasingLoop && !iterInfo->IsDecreasingLoop())
+ {
+ // Normally, we reject weird-looking loops in optIsLoopClonable, but it's not the case
+ // when we have both GDVs and array checks inside such loops.
+ return false;
+ }
// We already know that this is either increasing or decreasing loop and the
// stride is (> 0) or (< 0). Here, just take the abs() value and check if it
// is beyond the limit.
int stride = abs(iterInfo->IterConst());
- if (stride >= 58)
+ static_assert_no_msg(INT32_MAX >= CORINFO_Array_MaxLength);
+ if (stride >= (INT32_MAX - (CORINFO_Array_MaxLength - 1) + 1))
{
- // Array.MaxLength can have maximum of 0X7FFFFFC7 elements, so make sure
+ // Array.MaxLength can have maximum of 0x7fffffc7 elements, so make sure
// the stride increment doesn't overflow or underflow the index. Hence,
// the maximum stride limit is set to
// (int.MaxValue - (Array.MaxLength - 1) + 1), which is
@@ -1224,6 +1218,14 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl
return false;
}
+ // We don't know exactly whether we might be dealing with a Span or not,
+ // but if we suspect we are, we need to be careful about the stride:
+ // As Span<>.Length can be INT32_MAX unlike arrays.
+ if (hasSpans && (stride > 1))
+ {
+ return false;
+ }
+
LC_Ident ident;
// Init conditions
if (iterInfo->HasConstInit)
@@ -1366,6 +1368,15 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl
context->EnsureArrayDerefs(loop->GetIndex())->Push(array);
}
break;
+ case LcOptInfo::LcSpan:
+ {
+ LcSpanOptInfo* spanInfo = optInfo->AsLcSpanOptInfo();
+ LC_Span spanLen(&spanInfo->spanIndex);
+ LC_Ident spanLenIdent = LC_Ident::CreateSpanAccess(spanLen);
+ LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(spanLenIdent));
+ context->EnsureConditions(loop->GetIndex())->Push(cond);
+ }
+ break;
case LcOptInfo::LcMdArray:
{
LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
@@ -1508,10 +1519,6 @@ bool Compiler::optComputeDerefConditions(FlowGraphNaturalLoop* loop, LoopCloneCo
JitExpandArrayStack* const arrayDeref = context->EnsureArrayDerefs(loop->GetIndex());
JitExpandArrayStack* const objDeref = context->EnsureObjDerefs(loop->GetIndex());
- // We currently expect to have at least one of these.
- //
- assert((arrayDeref->Size() != 0) || (objDeref->Size() != 0));
-
// Generate the array dereference checks.
//
// For each array in the dereference list, construct a tree,
@@ -1732,6 +1739,39 @@ void Compiler::optPerformStaticOptimizations(FlowGraphNaturalLoop* loop,
DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
}
break;
+ case LcOptInfo::LcSpan:
+ {
+ LcSpanOptInfo* spanIndexInfo = optInfo->AsLcSpanOptInfo();
+ compCurBB = spanIndexInfo->spanIndex.useBlock;
+ GenTree* bndsChkNode = spanIndexInfo->spanIndex.bndsChk;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Remove bounds check ");
+ printTreeID(bndsChkNode->gtGetOp1());
+ printf(" for " FMT_STMT ", ", spanIndexInfo->stmt->GetID());
+ spanIndexInfo->spanIndex.Print();
+ printf(", bounds check nodes: ");
+ spanIndexInfo->spanIndex.PrintBoundsCheckNode();
+ printf("\n");
+ }
+#endif // DEBUG
+
+ if (bndsChkNode->gtGetOp1()->OperIs(GT_BOUNDS_CHECK))
+ {
+ optRemoveCommaBasedRangeCheck(bndsChkNode, spanIndexInfo->stmt);
+ }
+ else
+ {
+ JITDUMP(" Bounds check already removed\n");
+
+ // If the bounds check node isn't there, it better have been converted to a GT_NOP.
+ assert(bndsChkNode->gtGetOp1()->OperIs(GT_NOP));
+ }
+ DBEXEC(dynamicPath, optDebugLogLoopCloning(spanIndexInfo->spanIndex.useBlock, spanIndexInfo->stmt));
+ }
+ break;
case LcOptInfo::LcMdArray:
// TODO-CQ: CLONE: Implement.
break;
@@ -1966,7 +2006,6 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* contex
BasicBlock* insertAfter)
{
JITDUMP("Inserting loop " FMT_LP " loop choice conditions\n", loop->GetIndex());
- assert(context->HasBlockConditions(loop->GetIndex()));
assert(slowPreheader != nullptr);
if (context->HasBlockConditions(loop->GetIndex()))
@@ -2140,9 +2179,6 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex
// ...
// slowPreheader --> slowHeader
//
- // We should always have block conditions.
-
- assert(context->HasBlockConditions(loop->GetIndex()));
// If any condition is false, go to slowPreheader (which branches or falls through to header of the slow loop).
BasicBlock* slowHeader = nullptr;
@@ -2325,6 +2361,44 @@ bool Compiler::optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsN
return true;
}
+//---------------------------------------------------------------------------------------------------------------
+// optExtractSpanIndex: Try to extract the Span element access from "tree".
+//
+// Arguments:
+// tree - the tree to be checked if it is the Span [] operation.
+// result - the extracted information is updated in result.
+//
+// Return Value:
+// Returns true if Span index can be extracted, else, return false.
+//
+// Notes:
+// The way loop cloning works for Span is that we don't actually know (or care)
+// if it's a Span or an array, we just extract index and length locals out
+/// of the GT_BOUNDS_CHECK node. The fact that the length is a local var
+/// allows us to not worry about array/span dereferencing.
+//
+bool Compiler::optExtractSpanIndex(GenTree* tree, SpanIndex* result)
+{
+ // Bounds checks are almost always wrapped in a comma node
+ // and are the first operand.
+ if (!tree->OperIs(GT_COMMA) || !tree->gtGetOp1()->OperIs(GT_BOUNDS_CHECK))
+ {
+ return false;
+ }
+
+ GenTreeBoundsChk* arrBndsChk = tree->gtGetOp1()->AsBoundsChk();
+ if (!arrBndsChk->GetIndex()->OperIs(GT_LCL_VAR) || !arrBndsChk->GetArrayLength()->OperIs(GT_LCL_VAR))
+ {
+ return false;
+ }
+
+ result->lenLcl = arrBndsChk->GetArrayLength()->AsLclVarCommon()->GetLclNum();
+ result->indLcl = arrBndsChk->GetIndex()->AsLclVarCommon()->GetLclNum();
+ result->bndsChk = tree;
+ result->useBlock = compCurBB;
+ return true;
+}
+
//---------------------------------------------------------------------------------------------------------------
// optReconstructArrIndexHelp: Helper function for optReconstructArrIndex. See that function for more details.
//
@@ -2588,6 +2662,30 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop
return WALK_SKIP_SUBTREES;
}
+ SpanIndex spanIndex = SpanIndex();
+ if (info->cloneForArrayBounds && optExtractSpanIndex(tree, &spanIndex))
+ {
+ // Check that the span's length local variable is invariant within the loop body.
+ if (!optIsStackLocalInvariant(info->loop, spanIndex.lenLcl))
+ {
+ JITDUMP("Span.Length V%02d is not loop invariant\n", spanIndex.lenLcl);
+ return WALK_SKIP_SUBTREES;
+ }
+
+ unsigned iterVar = info->context->GetLoopIterInfo(info->loop->GetIndex())->IterVar;
+ if (spanIndex.indLcl == iterVar)
+ {
+ // Update the loop context.
+ info->context->EnsureLoopOptInfo(info->loop->GetIndex())
+ ->Push(new (this, CMK_LoopOpt) LcSpanOptInfo(spanIndex, info->stmt));
+ }
+ else
+ {
+ JITDUMP("Induction V%02d is not used as index\n", iterVar);
+ }
+ return WALK_SKIP_SUBTREES;
+ }
+
if (info->cloneForGDVTests && tree->OperIs(GT_JTRUE))
{
JITDUMP("...GDV considering [%06u]\n", dspTreeID(tree));
diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h
index cfda1be87a8b9d..ecdda09775f87a 100644
--- a/src/coreclr/jit/loopcloning.h
+++ b/src/coreclr/jit/loopcloning.h
@@ -211,6 +211,28 @@ struct ArrIndex
#endif
};
+// SpanIndex represents a span element access and associated bounds check.
+struct SpanIndex
+{
+ unsigned lenLcl; // The Span length local num
+ unsigned indLcl; // The index local num
+ GenTree* bndsChk; // The bounds check node
+ BasicBlock* useBlock; // Block where the [] occurs
+
+ SpanIndex()
+ : lenLcl(BAD_VAR_NUM)
+ , indLcl(BAD_VAR_NUM)
+ , bndsChk(nullptr)
+ , useBlock(nullptr)
+ {
+ }
+
+#ifdef DEBUG
+ void Print();
+ void PrintBoundsCheckNode();
+#endif
+};
+
// Forward declarations
#define LC_OPT(en) struct en##OptInfo;
#include "loopcloningopts.h"
@@ -317,6 +339,21 @@ struct LcJaggedArrayOptInfo : public LcOptInfo
}
};
+// Optimization info for a Span
+//
+struct LcSpanOptInfo : public LcOptInfo
+{
+ SpanIndex spanIndex; // SpanIndex representation of the Span.
+ Statement* stmt; // "stmt" where the optimization opportunity occurs.
+
+ LcSpanOptInfo(SpanIndex& spanIndex, Statement* stmt)
+ : LcOptInfo(LcSpan)
+ , spanIndex(spanIndex)
+ , stmt(stmt)
+ {
+ }
+};
+
// Optimization info for a type test
//
struct LcTypeTestOptInfo : public LcOptInfo
@@ -481,6 +518,38 @@ struct LC_Array
GenTree* ToGenTree(Compiler* comp, BasicBlock* bb);
};
+// Symbolic representation of Span.Length
+struct LC_Span
+{
+ SpanIndex* spanIndex;
+
+#ifdef DEBUG
+ void Print()
+ {
+ spanIndex->Print();
+ }
+#endif
+
+ LC_Span()
+ : spanIndex(nullptr)
+ {
+ }
+
+ LC_Span(SpanIndex* arrIndex)
+ : spanIndex(arrIndex)
+ {
+ }
+
+ // Equality operator
+ bool operator==(const LC_Span& that) const
+ {
+ return (spanIndex->lenLcl == that.spanIndex->lenLcl) && (spanIndex->indLcl == that.spanIndex->indLcl);
+ }
+
+ // Get a tree representation for this symbolic Span.Length
+ GenTree* ToGenTree(Compiler* comp);
+};
+
//------------------------------------------------------------------------
// LC_Ident: symbolic representation of "a value"
//
@@ -492,6 +561,7 @@ struct LC_Ident
Const,
Var,
ArrAccess,
+ SpanAccess,
Null,
ClassHandle,
IndirOfLocal,
@@ -509,6 +579,7 @@ struct LC_Ident
unsigned indirOffs;
};
LC_Array arrAccess;
+ LC_Span spanAccess;
CORINFO_CLASS_HANDLE clsHnd;
struct
{
@@ -553,6 +624,8 @@ struct LC_Ident
return (lclNum == that.lclNum) && (indirOffs == that.indirOffs);
case ArrAccess:
return (arrAccess == that.arrAccess);
+ case SpanAccess:
+ return (spanAccess == that.spanAccess);
case Null:
return true;
case MethodAddr:
@@ -598,6 +671,9 @@ struct LC_Ident
case ArrAccess:
arrAccess.Print();
break;
+ case SpanAccess:
+ spanAccess.Print();
+ break;
case Null:
printf("null");
break;
@@ -646,6 +722,13 @@ struct LC_Ident
return id;
}
+ static LC_Ident CreateSpanAccess(const LC_Span& spanLen)
+ {
+ LC_Ident id(SpanAccess);
+ id.spanAccess = spanLen;
+ return id;
+ }
+
static LC_Ident CreateNull()
{
return LC_Ident(Null);
diff --git a/src/coreclr/jit/loopcloningopts.h b/src/coreclr/jit/loopcloningopts.h
index 2fb13937e2f86a..e27a3d802e11c4 100644
--- a/src/coreclr/jit/loopcloningopts.h
+++ b/src/coreclr/jit/loopcloningopts.h
@@ -13,5 +13,6 @@ LC_OPT(LcMdArray)
LC_OPT(LcJaggedArray)
LC_OPT(LcTypeTest)
LC_OPT(LcMethodAddrTest)
+LC_OPT(LcSpan)
#undef LC_OPT
diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp
index 760bf995e50d7b..8b1dc5d10b1e06 100644
--- a/src/coreclr/jit/lower.cpp
+++ b/src/coreclr/jit/lower.cpp
@@ -7810,10 +7810,10 @@ bool Lowering::TryFoldBinop(GenTreeOp* node)
return true;
}
- if (node->OperIs(GT_LSH, GT_RSH, GT_RSZ, GT_ROL, GT_ROR, GT_OR, GT_XOR) &&
- (op1->IsIntegralConst(0) || op2->IsIntegralConst(0)))
+ if ((node->OperIs(GT_LSH, GT_RSH, GT_RSZ, GT_ROL, GT_ROR) && op2->IsIntegralConst(0)) ||
+ (node->OperIs(GT_OR, GT_XOR) && (op1->IsIntegralConst(0) || op2->IsIntegralConst(0))))
{
- GenTree* zeroOp = op1->IsIntegralConst(0) ? op1 : op2;
+ GenTree* zeroOp = op2->IsIntegralConst(0) ? op2 : op1;
GenTree* otherOp = zeroOp == op1 ? op2 : op1;
LIR::Use use;
@@ -7968,7 +7968,7 @@ PhaseStatus Lowering::DoPhase()
}
#if !defined(TARGET_64BIT)
- DecomposeLongs decomp(comp); // Initialize the long decomposition class.
+ DecomposeLongs decomp(comp, this); // Initialize the long decomposition class.
if (comp->compLongUsed)
{
decomp.PrepareForDecomposition();
diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h
index 611ceb09339233..d44880bd947554 100644
--- a/src/coreclr/jit/lower.h
+++ b/src/coreclr/jit/lower.h
@@ -464,61 +464,6 @@ class Lowering final : public Phase
unsigned simdSize);
#endif // FEATURE_HW_INTRINSICS
- //----------------------------------------------------------------------------------------------
- // TryRemoveCastIfPresent: Removes op it is a cast operation and the size of its input is at
- // least the size of expectedType
- //
- // Arguments:
- // expectedType - The expected type of the cast operation input if it is to be removed
- // op - The tree to remove if it is a cast op whose input is at least the size of expectedType
- //
- // Returns:
- // op if it was not a cast node or if its input is not at least the size of expected type;
- // Otherwise, it returns the underlying operation that was being casted
- GenTree* TryRemoveCastIfPresent(var_types expectedType, GenTree* op)
- {
- if (!op->OperIs(GT_CAST) || !comp->opts.OptimizationEnabled())
- {
- return op;
- }
-
- GenTreeCast* cast = op->AsCast();
- GenTree* castOp = cast->CastOp();
-
- // FP <-> INT casts should be kept
- if (varTypeIsFloating(castOp) ^ varTypeIsFloating(expectedType))
- {
- return op;
- }
-
- // Keep casts which can overflow
- if (cast->gtOverflow())
- {
- return op;
- }
-
- // Keep casts with operands usable from memory.
- if (castOp->isContained() || castOp->IsRegOptional())
- {
- return op;
- }
-
- if (genTypeSize(cast->CastToType()) >= genTypeSize(expectedType))
- {
-#ifndef TARGET_64BIT
- // Don't expose TYP_LONG on 32bit
- if (castOp->TypeIs(TYP_LONG))
- {
- return op;
- }
-#endif
- BlockRange().Remove(op);
- return castOp;
- }
-
- return op;
- }
-
// Utility functions
public:
static bool IndirsAreEquivalent(GenTree* pTreeA, GenTree* pTreeB);
@@ -568,6 +513,13 @@ class Lowering final : public Phase
bool IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTree* childNode, bool* supportsRegOptional);
#endif // FEATURE_HW_INTRINSICS
+ // Checks for memory conflicts in the instructions between childNode and parentNode, and returns true if childNode
+ // can be contained.
+ bool IsSafeToContainMem(GenTree* parentNode, GenTree* childNode) const;
+
+ // Similar to above, but allows bypassing a "transparent" parent.
+ bool IsSafeToContainMem(GenTree* grandparentNode, GenTree* parentNode, GenTree* childNode) const;
+
static void TransformUnusedIndirection(GenTreeIndir* ind, Compiler* comp, BasicBlock* block);
private:
@@ -599,13 +551,6 @@ class Lowering final : public Phase
GenTree* endExclusive,
GenTree* ignoreNode) const;
- // Checks for memory conflicts in the instructions between childNode and parentNode, and returns true if childNode
- // can be contained.
- bool IsSafeToContainMem(GenTree* parentNode, GenTree* childNode) const;
-
- // Similar to above, but allows bypassing a "transparent" parent.
- bool IsSafeToContainMem(GenTree* grandparentNode, GenTree* parentNode, GenTree* childNode) const;
-
// Check if marking an operand of a node as reg-optional is safe.
bool IsSafeToMarkRegOptional(GenTree* parentNode, GenTree* node) const;
diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp
index 963d0c4d1fbb7a..8c2528de2c2e65 100644
--- a/src/coreclr/jit/lowerarmarch.cpp
+++ b/src/coreclr/jit/lowerarmarch.cpp
@@ -2015,11 +2015,9 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
//
bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node)
{
- assert((node->GetHWIntrinsicId() == NI_Vector64_Create) || (node->GetHWIntrinsicId() == NI_Vector128_Create) ||
- (node->GetHWIntrinsicId() == NI_Vector64_CreateScalar) ||
- (node->GetHWIntrinsicId() == NI_Vector128_CreateScalar) ||
- (node->GetHWIntrinsicId() == NI_Vector64_CreateScalarUnsafe) ||
- (node->GetHWIntrinsicId() == NI_Vector128_CreateScalarUnsafe) ||
+ assert(HWIntrinsicInfo::IsVectorCreate(node->GetHWIntrinsicId()) ||
+ HWIntrinsicInfo::IsVectorCreateScalar(node->GetHWIntrinsicId()) ||
+ HWIntrinsicInfo::IsVectorCreateScalarUnsafe(node->GetHWIntrinsicId()) ||
(node->GetHWIntrinsicId() == NI_AdvSimd_DuplicateToVector64) ||
(node->GetHWIntrinsicId() == NI_AdvSimd_DuplicateToVector128) ||
(node->GetHWIntrinsicId() == NI_AdvSimd_Arm64_DuplicateToVector64) ||
@@ -2278,7 +2276,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
assert(simdSize != 0);
bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simdVal);
- bool isCreateScalar = (intrinsicId == NI_Vector64_CreateScalar) || (intrinsicId == NI_Vector128_CreateScalar);
+ bool isCreateScalar = HWIntrinsicInfo::IsVectorCreateScalar(intrinsicId);
size_t argCnt = node->GetOperandCount();
// Check if we have a cast that we can remove. Note that "IsValidConstForMovImm"
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index 1a7b0e708eb6c3..743218ecede33b 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -2044,26 +2044,14 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}
- case NI_SSE2_Insert:
case NI_SSE41_Insert:
- case NI_SSE41_X64_Insert:
{
assert(node->GetOperandCount() == 3);
- var_types simdBaseType = node->GetSimdBaseType();
-
- // Insert takes either a 32-bit register or a memory operand.
- // In either case, only SimdBaseType bits are read and so
- // widening or narrowing the operand may be unnecessary and it
- // can just be used directly.
-
- node->Op(2) = TryRemoveCastIfPresent(simdBaseType, node->Op(2));
-
- if (simdBaseType != TYP_FLOAT)
+ if (node->GetSimdBaseType() != TYP_FLOAT)
{
break;
}
- assert(intrinsicId == NI_SSE41_Insert);
// We have Sse41.Insert in which case we can specially handle
// a couple of interesting scenarios involving chains of Inserts
@@ -2272,19 +2260,6 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}
- case NI_SSE42_Crc32:
- {
- assert(node->GetOperandCount() == 2);
-
- // Crc32 takes either a bit register or a memory operand.
- // In either case, only gtType bits are read and so widening
- // or narrowing the operand may be unnecessary and it can
- // just be used directly.
-
- node->Op(2) = TryRemoveCastIfPresent(node->TypeGet(), node->Op(2));
- break;
- }
-
case NI_SSE2_CompareGreaterThan:
{
if (node->GetSimdBaseType() != TYP_DOUBLE)
@@ -3082,6 +3057,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm
comp->gtNewSimdCreateBroadcastNode(simdType, broadcastOp,
op1Intrinsic->GetSimdBaseJitType(), simdSize);
+ assert(vecCns->IsCnsVec());
BlockRange().InsertAfter(broadcastOp, vecCns);
nestedOp2 = vecCns;
@@ -4032,10 +4008,9 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
GenTree* tmp2 = nullptr;
GenTree* tmp3 = nullptr;
- bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simdVal);
- bool isCreateScalar = (intrinsicId == NI_Vector128_CreateScalar) || (intrinsicId == NI_Vector256_CreateScalar) ||
- (intrinsicId == NI_Vector512_CreateScalar);
- size_t argCnt = node->GetOperandCount();
+ bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simdVal);
+ bool isCreateScalar = HWIntrinsicInfo::IsVectorCreateScalar(intrinsicId);
+ size_t argCnt = node->GetOperandCount();
if (isConstant)
{
@@ -4046,8 +4021,8 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
#if !defined(TARGET_64BIT)
if (arg->OperIsLong())
{
- BlockRange().Remove(arg->AsOp()->gtGetOp1());
- BlockRange().Remove(arg->AsOp()->gtGetOp2());
+ BlockRange().Remove(arg->gtGetOp1());
+ BlockRange().Remove(arg->gtGetOp2());
}
#endif // !TARGET_64BIT
BlockRange().Remove(arg);
@@ -4075,165 +4050,61 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
{
if (isCreateScalar)
{
- node->gtType = TYP_SIMD16;
- node->SetSimdSize(16);
-
switch (simdBaseType)
{
case TYP_BYTE:
case TYP_UBYTE:
- {
- // Types need to be explicitly zero-extended to ensure upper-bits are zero
- //
- // We need to explicitly use TYP_UBYTE since unsigned is ignored for small types
- // Explicitly handle both BYTE and UBYTE to account for reinterpret casts and the like
- //
- // The from type is INT since that is the input type tracked by IR, where-as the target
- // type needs to be UBYTE so it implicitly zero-extends back to TYP_INT
-
- tmp1 = comp->gtNewCastNode(TYP_INT, op1, /* unsigned */ true, TYP_UBYTE);
- BlockRange().InsertAfter(op1, tmp1);
- LowerNode(tmp1);
-
- node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128Int32, tmp1);
- node->SetSimdBaseJitType(CORINFO_TYPE_INT);
- break;
- }
-
case TYP_SHORT:
case TYP_USHORT:
{
- // Types need to be explicitly zero-extended to ensure upper-bits are zero
+ // The smallest scalar SIMD load that zeroes upper elements is 32 bits, so for CreateScalar,
+ // we must ensure that the upper bits of that 32-bit value are zero if the base type is small.
//
- // We need to explicitly use TYP_USHORT since unsigned is ignored for small types
- // Explicitly handle both SHORT and USHORT to account for reinterpret casts and the like
+ // The most likely case is that op1 is a cast from int/long to the base type:
+ // * CAST int <- short <- int/long
+ // If the base type is signed, that cast will be sign-extending, but we need zero extension,
+ // so we can simply retype the cast to the unsigned type of the same size.
//
- // The from type is INT since that is the input type tracked by IR, where-as the target
- // type needs to be USHORT so it implicitly zero-extends back to TYP_INT
-
- tmp1 = comp->gtNewCastNode(TYP_INT, op1, /* unsigned */ true, TYP_USHORT);
- BlockRange().InsertAfter(op1, tmp1);
- LowerNode(tmp1);
+ // It's also possible we have a memory load of the base type:
+ // * IND short
+ // We can likewise change the type of the indir to force zero extension on load.
+ //
+ // If we can't safely retype one of the above patterns and don't already have a cast to the
+ // correct unsigned type, we will insert our own cast.
- node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128Int32, tmp1);
node->SetSimdBaseJitType(CORINFO_TYPE_INT);
- break;
- }
-
- case TYP_INT:
- {
- node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128Int32);
- break;
- }
-
- case TYP_UINT:
- {
- node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128UInt32);
- break;
- }
-#if defined(TARGET_AMD64)
- case TYP_LONG:
- {
- node->ChangeHWIntrinsicId(NI_SSE2_X64_ConvertScalarToVector128Int64);
- break;
- }
-
- case TYP_ULONG:
- {
- node->ChangeHWIntrinsicId(NI_SSE2_X64_ConvertScalarToVector128UInt64);
- break;
- }
-#endif // TARGET_AMD64
+ var_types unsignedType = varTypeToUnsigned(simdBaseType);
- case TYP_FLOAT:
- {
- tmp1 = comp->gtNewZeroConNode(simdType);
- BlockRange().InsertBefore(op1, tmp1);
- LowerNode(tmp1);
-
- if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
+ if (op1->OperIs(GT_CAST) && !op1->gtOverflow())
{
- // Sse41.Insert has:
- // * Bits 0-3: zmask
- // * Bits 4-5: count_d
- // * Bits 6-7: count_s (register form only)
- //
- // We want zmask 0b1110 (0xE) to zero elements 1/2/3
- // We want count_d 0b00 (0x0) to insert the value to element 0
- // We want count_s 0b00 (0x0) as we're just taking element 0 of the source
-
- idx = comp->gtNewIconNode(0x0E);
- BlockRange().InsertAfter(op1, idx);
- LowerNode(idx);
-
- node->ResetHWIntrinsicId(NI_SSE41_Insert, comp, tmp1, op1, idx);
+ assert(op1->TypeIs(TYP_INT) && (genTypeSize(op1->CastToType()) == genTypeSize(simdBaseType)));
+ op1->AsCast()->gtCastType = unsignedType;
}
- else
+ else if (op1->OperIs(GT_IND, GT_LCL_FLD))
{
- node->ResetHWIntrinsicId(NI_SSE_MoveScalar, comp, tmp1, op1);
+ assert(genTypeSize(op1) == genTypeSize(simdBaseType));
+ op1->gtType = unsignedType;
+ }
+ else if (!op1->OperIs(GT_CAST) || (op1->AsCast()->CastToType() != unsignedType))
+ {
+ tmp1 = comp->gtNewCastNode(TYP_INT, op1, /* fromUnsigned */ false, unsignedType);
+ node->Op(1) = tmp1;
+ BlockRange().InsertAfter(op1, tmp1);
+ LowerNode(tmp1);
}
- break;
- }
-
- case TYP_DOUBLE:
- {
- tmp1 = comp->gtNewZeroConNode(simdType);
- BlockRange().InsertBefore(op1, tmp1);
- LowerNode(tmp1);
- node->ResetHWIntrinsicId(NI_SSE2_MoveScalar, comp, tmp1, op1);
break;
}
default:
{
- unreached();
- }
- }
-
- if (simdSize > 16)
- {
- assert((simdSize == 32) || (simdSize == 64));
-
- // We're creating a Vector256/512 scalar so we need to treat the original op as Vector128,
- // we need to unsafely extend up to Vector256/512 (which is actually safe since the 128-bit
- // op will zero extend up to 256/512-bits), and then we need to replace the original use
- // with the new TYP_SIMD32/64 node.
-
- node->ChangeType(TYP_SIMD16);
- node->SetSimdSize(16);
- LowerNode(node);
-
- LIR::Use use;
- bool foundUse = BlockRange().TryGetUse(node, &use);
-
- tmp2 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, node, NI_Vector128_ToVector256Unsafe, simdBaseJitType,
- 16);
- BlockRange().InsertAfter(node, tmp2);
-
- if (simdSize == 64)
- {
- tmp3 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD64, tmp2, NI_Vector256_ToVector512Unsafe,
- simdBaseJitType, 32);
- BlockRange().InsertAfter(tmp2, tmp3);
- tmp2 = tmp3;
- }
-
- if (foundUse)
- {
- use.ReplaceWith(tmp2);
- }
- else
- {
- node->ClearUnusedValue();
- tmp2->SetUnusedValue();
+ break;
}
-
- node = tmp2->AsHWIntrinsic();
}
- return LowerNode(node);
+ ContainCheckHWIntrinsic(node);
+ return node->gtNext;
}
// We have the following (where simd is simd16, simd32 or simd64):
@@ -4509,40 +4380,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
break;
}
-#if defined(TARGET_AMD64)
- case TYP_LONG:
- case TYP_ULONG:
- {
- // We will be constructing the following parts:
- // ...
- // /--* tmp1 simd16
- // * STORE_LCL_VAR simd16
- // tmp1 = LCL_VAR simd16
- // tmp2 = LCL_VAR simd16
- // /--* tmp1 simd16
- // +--* tmp2 simd16
- // node = * HWINTRINSIC simd16 ulong UnpackLow
-
- // This is roughly the following managed code:
- // ...
- // var tmp2 = tmp1;
- // return Sse2.UnpackLow(tmp1, tmp2);
-
- assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2));
-
- node->Op(1) = tmp1;
- LIR::Use tmp1Use(BlockRange(), &node->Op(1), node);
- ReplaceWithLclVar(tmp1Use);
- tmp1 = node->Op(1);
-
- tmp2 = comp->gtClone(tmp1);
- BlockRange().InsertAfter(tmp1, tmp2);
-
- node->ResetHWIntrinsicId(NI_SSE2_UnpackLow, tmp1, tmp2);
- break;
- }
-#endif // TARGET_AMD64
-
case TYP_FLOAT:
{
if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX))
@@ -4599,9 +4436,12 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
break;
}
+ case TYP_LONG:
+ case TYP_ULONG:
case TYP_DOUBLE:
{
- if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE3))
+ if ((IsContainableMemoryOp(op1) || simdBaseType == TYP_DOUBLE) &&
+ comp->compOpportunisticallyDependsOn(InstructionSet_SSE3))
{
// We will be constructing the following parts:
// ...
@@ -4613,6 +4453,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
// return Sse3.MoveAndDuplicate(tmp1);
node->ChangeHWIntrinsicId(NI_SSE3_MoveAndDuplicate, tmp1);
+ node->SetSimdBaseJitType(CORINFO_TYPE_DOUBLE);
break;
}
@@ -4626,12 +4467,12 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
// tmp2 = LCL_VAR simd16
// /--* tmp1 simd16
// +--* tmp2 simd16
- // node = * HWINTRINSIC simd16 float MoveLowToHigh
+ // node = * HWINTRINSIC simd16 T UnpackLow
// This is roughly the following managed code:
// ...
// var tmp2 = tmp1;
- // return Sse.MoveLowToHigh(tmp1, tmp2);
+ // return Sse2.UnpackLow(tmp1, tmp2);
node->Op(1) = tmp1;
LIR::Use tmp1Use(BlockRange(), &node->Op(1), node);
@@ -4641,8 +4482,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
tmp2 = comp->gtClone(tmp1);
BlockRange().InsertAfter(tmp1, tmp2);
- node->ResetHWIntrinsicId(NI_SSE_MoveLowToHigh, tmp1, tmp2);
- node->SetSimdBaseJitType(CORINFO_TYPE_FLOAT);
+ node->ResetHWIntrinsicId(NI_SSE2_UnpackLow, tmp1, tmp2);
break;
}
@@ -4655,19 +4495,16 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
return LowerNode(node);
}
- GenTree* op2 = node->Op(2);
-
- // TODO-XArch-AVX512 : Merge the NI_Vector512_Create and NI_Vector256_Create paths below.
- // We have the following (where simd is simd16 or simd32):
- // /--* op1 T
- // +--* ... T
- // +--* opN T
- // node = * HWINTRINSIC simd T Create
- if (intrinsicId == NI_Vector512_Create)
+ if (intrinsicId == NI_Vector512_Create || intrinsicId == NI_Vector256_Create)
{
- assert(comp->IsBaselineVector512IsaSupportedDebugOnly());
+ assert(argCnt >= (simdSize / genTypeSize(TYP_LONG)));
+ assert(((simdSize == 64) && comp->IsBaselineVector512IsaSupportedDebugOnly()) ||
+ ((simdSize == 32) && comp->IsBaselineVector256IsaSupportedDebugOnly()));
- // We will be constructing the following parts:
+ // The larger vector implementation is simplified by splitting the
+ // job in half and delegating to the next smaller vector size.
+ //
+ // For example, for Vector512, we construct the following:
// /--* op1 T
// +--* ... T
// lo = * HWINTRINSIC simd32 T Create
@@ -4697,86 +4534,35 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
// lo = Vector256.Create(op1, ..., op16);
// hi = Vector256.Create(op17, ..., op32);
+ var_types halfType = comp->getSIMDTypeForSize(simdSize / 2);
+ NamedIntrinsic halfCreate = (simdSize == 64) ? NI_Vector256_Create : NI_Vector128_Create;
+ NamedIntrinsic withUpper = (simdSize == 64) ? NI_Vector512_WithUpper : NI_Vector256_WithUpper;
+
size_t halfArgCnt = argCnt / 2;
assert((halfArgCnt * 2) == argCnt);
GenTree* loInsertionPoint = LIR::LastNode(node->GetOperandArray(), halfArgCnt);
-
- GenTree* lo = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, node->GetOperandArray(), halfArgCnt,
- NI_Vector256_Create, simdBaseJitType, 32);
- BlockRange().InsertAfter(loInsertionPoint, lo);
-
GenTree* hiInsertionPoint = LIR::LastNode(node->GetOperandArray(halfArgCnt), halfArgCnt);
- GenTree* hi = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, node->GetOperandArray(halfArgCnt), halfArgCnt,
- NI_Vector256_Create, simdBaseJitType, 32);
- BlockRange().InsertAfter(hiInsertionPoint, hi);
-
- assert(argCnt >= 7);
- node->ResetHWIntrinsicId(NI_Vector512_WithUpper, comp, lo, hi);
-
- LowerNode(lo);
- LowerNode(hi);
-
- return LowerNode(node);
- }
- else if (intrinsicId == NI_Vector256_Create)
- {
- assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX));
- // We will be constructing the following parts:
- // /--* op1 T
- // +--* ... T
- // lo = * HWINTRINSIC simd16 T Create
- // /--* ... T
- // +--* opN T
- // hi = * HWINTRINSIC simd16 T Create
- // /--* lo simd32
- // +--* hi simd16
- // node = * HWINTRINSIC simd32 T WithUpper
-
- // This is roughly the following managed code:
- // ...
- // var lo = Vector128.Create(op1, ...);
- // var hi = Vector128.Create(..., opN);
- // return lo.WithUpper(hi);
-
- // Each Vector128.Create call gets half the operands. That is:
- // lo = Vector128.Create(op1, op2);
- // hi = Vector128.Create(op3, op4);
- // -or-
- // lo = Vector128.Create(op1, ..., op4);
- // hi = Vector128.Create(op5, ..., op8);
- // -or-
- // lo = Vector128.Create(op1, ..., op8);
- // hi = Vector128.Create(op9, ..., op16);
- // -or-
- // lo = Vector128.Create(op1, ..., op16);
- // hi = Vector128.Create(op17, ..., op32);
+ GenTree* lo = comp->gtNewSimdHWIntrinsicNode(halfType, node->GetOperandArray(), halfArgCnt, halfCreate,
+ simdBaseJitType, simdSize / 2);
- size_t halfArgCnt = argCnt / 2;
- assert((halfArgCnt * 2) == argCnt);
+ GenTree* hi = comp->gtNewSimdHWIntrinsicNode(halfType, node->GetOperandArray(halfArgCnt), halfArgCnt,
+ halfCreate, simdBaseJitType, simdSize / 2);
- GenTree* loInsertionPoint = LIR::LastNode(node->GetOperandArray(), halfArgCnt);
+ node->ResetHWIntrinsicId(withUpper, comp, lo, hi);
- GenTree* lo = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(), halfArgCnt,
- NI_Vector128_Create, simdBaseJitType, 16);
BlockRange().InsertAfter(loInsertionPoint, lo);
-
- GenTree* hiInsertionPoint = LIR::LastNode(node->GetOperandArray(halfArgCnt), halfArgCnt);
-
- GenTree* hi = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(halfArgCnt), halfArgCnt,
- NI_Vector128_Create, simdBaseJitType, 16);
BlockRange().InsertAfter(hiInsertionPoint, hi);
- assert(argCnt >= 3);
- node->ResetHWIntrinsicId(NI_Vector256_WithUpper, comp, lo, hi);
-
LowerNode(lo);
LowerNode(hi);
return LowerNode(node);
}
+ assert(intrinsicId == NI_Vector128_Create);
+
// We will be constructing the following parts:
// /--* op1 T
// tmp1 = * HWINTRINSIC simd16 T CreateScalarUnsafe
@@ -4975,54 +4761,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
break;
}
-#if defined(TARGET_AMD64)
- case TYP_LONG:
- case TYP_ULONG:
- {
- if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41_X64))
- {
- // We will be constructing the following parts:
- // ...
- // idx = CNS_INT int 1
- // /--* tmp1 simd16
- // +--* op2 T
- // +--* idx int
- // node = * HWINTRINSIC simd16 T Insert
-
- // This is roughly the following managed code:
- // ...
- // return Sse41.X64.Insert(tmp1, op2, 0x01);
-
- idx = comp->gtNewIconNode(0x01, TYP_INT);
- BlockRange().InsertBefore(node, idx);
-
- node->ResetHWIntrinsicId(NI_SSE41_X64_Insert, comp, tmp1, op2, idx);
- break;
- }
-
- // We will be constructing the following parts:
- // ...
- // /--* op2 T
- // tmp2 = * HWINTRINSIC simd16 T CreateScalarUnsafe
- // /--* tmp1 simd16
- // +--* tmp2 simd16
- // node = * HWINTRINSIC simd16 T UnpackLow
-
- // This is roughly the following managed code:
- // ...
- // var tmp2 = Vector128.CreateScalarUnsafe(op2);
- // return Sse2.UnpackLow(tmp1, tmp2);
-
- assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2));
-
- tmp2 = InsertNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, simdBaseJitType, 16);
- LowerNode(tmp2);
-
- node->ResetHWIntrinsicId(NI_SSE2_UnpackLow, tmp1, tmp2);
- break;
- }
-#endif // TARGET_AMD64
-
case TYP_FLOAT:
{
unsigned N = 0;
@@ -5162,28 +4900,52 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
break;
}
+ case TYP_LONG:
+ case TYP_ULONG:
case TYP_DOUBLE:
{
+ GenTree* op2 = node->Op(2);
+
+ if (varTypeIsLong(simdBaseType) && comp->compOpportunisticallyDependsOn(InstructionSet_SSE41_X64))
+ {
+ // We will be constructing the following parts:
+ // ...
+ // idx = CNS_INT int 1
+ // /--* tmp1 simd16
+ // +--* op2 T
+ // +--* idx int
+ // node = * HWINTRINSIC simd16 T Insert
+
+ // This is roughly the following managed code:
+ // ...
+ // return Sse41.X64.Insert(tmp1, op2, 0x01);
+
+ idx = comp->gtNewIconNode(0x01, TYP_INT);
+ BlockRange().InsertBefore(node, idx);
+
+ node->ResetHWIntrinsicId(NI_SSE41_X64_Insert, comp, tmp1, op2, idx);
+ break;
+ }
+
// We will be constructing the following parts:
// ...
// /--* op2 T
// tmp2 = * HWINTRINSIC simd16 T CreateScalarUnsafe
// /--* tmp1 simd16
// +--* tmp2 simd16
- // node = * HWINTRINSIC simd16 T MoveLowToHigh
+ // node = * HWINTRINSIC simd16 T UnpackLow
// This is roughly the following managed code:
// ...
// var tmp2 = Vector128.CreateScalarUnsafe(op2);
- // return Sse.MoveLowToHigh(tmp1, tmp2);
+ // return Sse.UnpackLow(tmp1, tmp2);
assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2));
tmp2 = InsertNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, simdBaseJitType, 16);
LowerNode(tmp2);
- node->ResetHWIntrinsicId(NI_SSE_MoveLowToHigh, tmp1, tmp2);
- node->SetSimdBaseJitType(CORINFO_TYPE_FLOAT);
+ node->ResetHWIntrinsicId(NI_SSE2_UnpackLow, tmp1, tmp2);
break;
}
@@ -5210,9 +4972,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node)
var_types simdBaseType = node->GetSimdBaseType();
unsigned simdSize = node->GetSimdSize();
- assert((intrinsicId == NI_Vector128_GetElement) || (intrinsicId == NI_Vector256_GetElement) ||
- (intrinsicId == NI_Vector512_GetElement));
-
+ assert(HWIntrinsicInfo::IsVectorGetElement(intrinsicId));
assert(!varTypeIsSIMD(simdType));
assert(varTypeIsArithmetic(simdBaseType));
assert(simdSize != 0);
@@ -6835,24 +6595,25 @@ GenTree* Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node)
unsigned simdSize = node->GetSimdSize();
var_types simdType = Compiler::getSIMDTypeForSize(simdSize);
- assert((intrinsicId == NI_Vector128_ToScalar) || (intrinsicId == NI_Vector256_ToScalar) ||
- (intrinsicId == NI_Vector512_ToScalar));
-
+ assert(HWIntrinsicInfo::IsVectorToScalar(intrinsicId));
assert(varTypeIsSIMD(simdType));
assert(varTypeIsArithmetic(simdBaseType));
assert(simdSize != 0);
GenTree* op1 = node->Op(1);
- if (IsContainableMemoryOp(op1))
+ if (IsContainableMemoryOp(op1) && (!varTypeIsLong(simdBaseType) || TargetArchitecture::Is64Bit))
{
- // We will specially handle ToScalar when op1 is already in memory
+ // If op1 is already in memory, we'd like the consumer of ToScalar to be able to look
+ // through to the memory directly. Early folding is preferable, as it unlocks additional
+ // containment opportunities for the consuming nodes. If we can't fold away ToScalar,
+ // we will still contain op1 if possible, and let codegen try to peek through to it.
+ //
+ // However, we specifically need to avoid doing this for long on 32-bit because we are
+ // already past DecomposeLongs, and codegen wouldn't be able to handle it.
if (op1->OperIs(GT_IND))
{
- // We want to optimize ToScalar down to an Indir where possible as
- // this unlocks additional containment opportunities for various nodes
-
GenTreeIndir* indir = op1->AsIndir();
GenTreeIndir* newIndir =
@@ -6879,9 +6640,6 @@ GenTree* Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node)
{
uint32_t elemSize = genTypeSize(simdBaseType);
- // We want to optimize ToScalar down to a LclFld where possible as
- // this unlocks additional containment opportunities for various nodes
-
GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon();
uint32_t lclOffs = lclVar->GetLclOffs() + (0 * elemSize);
LclVarDsc* lclDsc = comp->lvaGetDesc(lclVar);
@@ -6908,92 +6666,10 @@ GenTree* Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node)
return LowerNode(lclFld);
}
}
-
- if (IsSafeToContainMem(node, op1))
- {
- // Handle other cases in codegen
- ContainCheckHWIntrinsic(node);
- return node->gtNext;
- }
}
- switch (simdBaseType)
- {
- case TYP_BYTE:
- case TYP_SHORT:
- case TYP_INT:
- {
- node->gtType = TYP_INT;
- node->SetSimdBaseJitType(CORINFO_TYPE_INT);
- node->ChangeHWIntrinsicId(NI_SSE2_ConvertToInt32);
- break;
- }
-
- case TYP_UBYTE:
- case TYP_USHORT:
- case TYP_UINT:
- {
- node->gtType = TYP_INT;
- node->SetSimdBaseJitType(CORINFO_TYPE_UINT);
- node->ChangeHWIntrinsicId(NI_SSE2_ConvertToUInt32);
- break;
- }
-
-#if defined(TARGET_AMD64)
- case TYP_LONG:
- {
- node->ChangeHWIntrinsicId(NI_SSE2_X64_ConvertToInt64);
- break;
- }
-
- case TYP_ULONG:
- {
- node->ChangeHWIntrinsicId(NI_SSE2_X64_ConvertToUInt64);
- break;
- }
-#endif // TARGET_AMD64
-
- case TYP_FLOAT:
- case TYP_DOUBLE:
- {
- ContainCheckHWIntrinsic(node);
- return node->gtNext;
- }
-
- default:
- {
- unreached();
- }
- }
-
- GenTree* next = LowerNode(node);
-
- if (genTypeSize(simdBaseType) < 4)
- {
- // The move intrinsics do not touch the upper bits, so we need an explicit
- // cast to ensure the result is properly sign extended
-
- LIR::Use use;
-
- bool foundUse = BlockRange().TryGetUse(node, &use);
- bool fromUnsigned = varTypeIsUnsigned(simdBaseType);
-
- GenTreeCast* cast = comp->gtNewCastNode(TYP_INT, node, fromUnsigned, simdBaseType);
- BlockRange().InsertAfter(node, cast);
-
- if (foundUse)
- {
- use.ReplaceWith(cast);
- }
- else
- {
- node->ClearUnusedValue();
- cast->SetUnusedValue();
- }
- next = LowerNode(cast);
- }
-
- return next;
+ ContainCheckHWIntrinsic(node);
+ return node->gtNext;
}
//----------------------------------------------------------------------------------------------
@@ -8016,20 +7692,38 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node)
case NI_Vector256_ToScalar:
case NI_Vector512_ToScalar:
{
- if (varTypeIsFloating(simdBaseType))
+ // These intrinsics are "ins reg/mem, xmm" or "ins xmm, reg/mem"
+ //
+ // In the case we are coming from and going to memory, we want to
+ // preserve the original containment as we'll end up emitting a pair
+ // of scalar moves. e.g. for float:
+ // movss xmm0, [addr1] ; Size: 4, Latency: 4-7, TP: 0.5
+ // movss [addr2], xmm0 ; Size: 4, Latency: 4-10, TP: 1
+ //
+ // However, we want to prefer containing the store over allowing the
+ // input to be regOptional, so track and clear containment if required.
+
+ clearContainedNode = hwintrinsic->Op(1);
+ isContainable = !clearContainedNode->isContained();
+
+ if (isContainable && varTypeIsIntegral(simdBaseType))
{
- // These intrinsics are "ins reg/mem, xmm" or "ins xmm, reg/mem"
- //
- // In the case we are coming from and going to memory, we want to
- // preserve the original containment as we'll end up emitting:
- // movss xmm0, [addr1] ; Size: 4, Latency: 4-7, TP: 0.5
- // movss [addr2], xmm0 ; Size: 4, Latency: 4-10, TP: 1
- //
- // However, we want to prefer containing the store over allowing the
- // input to be regOptional, so track and clear containment if required.
+ isContainable = (genTypeSize(simdBaseType) == genTypeSize(node)) &&
+ (!varTypeIsSmall(simdBaseType) ||
+ comp->compOpportunisticallyDependsOn(InstructionSet_SSE41));
+
+ if (isContainable && varTypeIsSmall(simdBaseType))
+ {
+ CorInfoType baseJitType = varTypeIsByte(node) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_USHORT;
+ intrinsicId = varTypeIsByte(node) ? NI_SSE41_Extract : NI_SSE2_Extract;
+
+ GenTree* zero = comp->gtNewZeroConNode(TYP_INT);
+ BlockRange().InsertBefore(hwintrinsic, zero);
- clearContainedNode = hwintrinsic->Op(1);
- isContainable = !clearContainedNode->isContained();
+ hwintrinsic->SetSimdBaseJitType(baseJitType);
+ hwintrinsic->ResetHWIntrinsicId(intrinsicId, hwintrinsic->Op(1), zero);
+ zero->SetContained();
+ }
}
break;
}
@@ -8453,21 +8147,40 @@ void Lowering::ContainCheckDivOrMod(GenTreeOp* node)
void Lowering::ContainCheckShiftRotate(GenTreeOp* node)
{
assert(node->OperIsShiftOrRotate());
+
+ GenTree* source = node->gtOp1;
+ GenTree* shiftBy = node->gtOp2;
+
#ifdef TARGET_X86
- GenTree* source = node->gtOp1;
if (node->OperIsShiftLong())
{
- assert(source->OperGet() == GT_LONG);
+ assert(source->OperIs(GT_LONG));
MakeSrcContained(node, source);
}
-#endif
+#endif // TARGET_X86
- GenTree* shiftBy = node->gtOp2;
if (IsContainableImmed(node, shiftBy) && (shiftBy->AsIntConCommon()->IconValue() <= 255) &&
(shiftBy->AsIntConCommon()->IconValue() >= 0))
{
MakeSrcContained(node, shiftBy);
}
+
+ bool canContainSource = !source->isContained() && (genTypeSize(source) >= genTypeSize(node));
+
+ // BMI2 rotate and shift instructions take memory operands but do not set flags.
+ // rorx takes imm8 for the rotate amount; shlx/shrx/sarx take r32/64 for shift amount.
+ if (canContainSource && !node->gtSetFlags() && (shiftBy->isContained() != node->OperIsShift()) &&
+ comp->compOpportunisticallyDependsOn(InstructionSet_BMI2))
+ {
+ if (IsContainableMemoryOp(source) && IsSafeToContainMem(node, source))
+ {
+ MakeSrcContained(node, source);
+ }
+ else if (IsSafeToMarkRegOptional(node, source))
+ {
+ MakeSrcRegOptional(node, source);
+ }
+ }
}
//------------------------------------------------------------------------
@@ -9152,6 +8865,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
case NI_AVX10v1_ShiftRightArithmetic:
{
assert((tupleType & INS_TT_MEM128) != 0);
+ tupleType = static_cast(tupleType & ~INS_TT_MEM128);
// Shift amount (op2) can be either imm8 or vector. If vector, it will always be xmm/m128.
//
@@ -9163,12 +8877,8 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
expectedSize = genTypeSize(TYP_SIMD16);
break;
}
- else if ((expectedSize < genTypeSize(TYP_SIMD64)) && (ins != INS_vpsraq))
+ else if (!comp->canUseEvexEncoding())
{
- // TODO-XArch-CQ: This should really only be checking EVEX capability, however
- // emitter::TakesEvexPrefix doesn't currently handle requiring EVEX based on presence
- // of an immediate operand. For now we disable containment of op1 unless EVEX is
- // required for some other reason.
supportsMemoryOp = false;
break;
}
@@ -9214,7 +8924,6 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
default:
SIZE_FROM_TUPLE_TYPE:
{
- tupleType = static_cast(tupleType & ~INS_TT_MEM128);
switch (tupleType)
{
case INS_TT_NONE:
@@ -9295,6 +9004,9 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
switch (parentIntrinsicId)
{
+ case NI_Vector128_CreateScalar:
+ case NI_Vector256_CreateScalar:
+ case NI_Vector512_CreateScalar:
case NI_Vector128_CreateScalarUnsafe:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_CreateScalarUnsafe:
@@ -9399,6 +9111,9 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
switch (intrinsicId)
{
+ case NI_Vector128_CreateScalar:
+ case NI_Vector256_CreateScalar:
+ case NI_Vector512_CreateScalar:
case NI_Vector128_CreateScalarUnsafe:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_CreateScalarUnsafe:
@@ -9494,7 +9209,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
{
GenTreeHWIntrinsic* hwintrinsicOperand = broadcastOperand->AsHWIntrinsic();
- if (hwintrinsicOperand->OperIsCreateScalarUnsafe())
+ if (HWIntrinsicInfo::IsVectorCreateScalarUnsafe(hwintrinsicOperand->GetHWIntrinsicId()))
{
// CreateScalarUnsafe can contain non-memory operands such as enregistered
// locals, so we want to check if its operand is containable instead. This
@@ -9841,9 +9556,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
if ((simdSize == 8) || (simdSize == 12))
{
// We want to handle GetElement/ToScalar still for Vector2/3
- if ((intrinsicId != NI_Vector128_GetElement) && (intrinsicId != NI_Vector128_ToScalar) &&
- (intrinsicId != NI_Vector256_GetElement) && (intrinsicId != NI_Vector256_ToScalar) &&
- (intrinsicId != NI_Vector512_GetElement) && (intrinsicId != NI_Vector512_ToScalar))
+ if (!HWIntrinsicInfo::IsVectorToScalar(intrinsicId) && !HWIntrinsicInfo::IsVectorGetElement(intrinsicId))
{
// TODO-XArch-CQ: Ideally we would key this off of the size the containing node
// expects vs the size node actually is or would be if spilled to the stack
@@ -9952,7 +9665,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
{
GenTreeHWIntrinsic* childNode = op1->AsHWIntrinsic();
- if (childNode->OperIsCreateScalarUnsafe())
+ if (HWIntrinsicInfo::IsVectorCreateScalarUnsafe(childNode->GetHWIntrinsicId()))
{
// We have a very special case of BroadcastScalarToVector(CreateScalarUnsafe(op1))
//
@@ -9964,6 +9677,12 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
// op1 directly, we'll then special case the codegen to materialize the value into a
// SIMD register in the case it is marked optional and doesn't get spilled.
+ if (childNode->Op(1)->OperIsLong())
+ {
+ // Decomposed longs require special codegen
+ return;
+ }
+
node->Op(1) = childNode->Op(1);
BlockRange().Remove(op1);
@@ -10068,6 +9787,50 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
return;
}
+#ifdef TARGET_X86
+ case NI_Vector128_CreateScalar:
+ case NI_Vector256_CreateScalar:
+ case NI_Vector512_CreateScalar:
+ case NI_Vector128_CreateScalarUnsafe:
+ case NI_Vector256_CreateScalarUnsafe:
+ case NI_Vector512_CreateScalarUnsafe:
+ {
+ if (op1->OperIsLong())
+ {
+ // Contain decomposed longs and handle them in codegen
+ assert(varTypeIsLong(simdBaseType));
+
+ for (GenTree* longOp : op1->Operands())
+ {
+ if (IsContainableMemoryOp(longOp) && IsSafeToContainMem(node, longOp))
+ {
+ MakeSrcContained(node, longOp);
+ }
+ else if (IsSafeToMarkRegOptional(node, longOp))
+ {
+ MakeSrcRegOptional(node, longOp);
+ }
+ }
+
+ MakeSrcContained(node, op1);
+ return;
+ }
+ break;
+ }
+
+ case NI_Vector128_ToScalar:
+ case NI_Vector256_ToScalar:
+ case NI_Vector512_ToScalar:
+ {
+ // These will be contained by a STOREIND
+ if (varTypeIsLong(simdBaseType))
+ {
+ return;
+ }
+ break;
+ }
+#endif
+
default:
{
break;
diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp
index fe29ad9fd77d9b..1aa99206749e23 100644
--- a/src/coreclr/jit/lsra.cpp
+++ b/src/coreclr/jit/lsra.cpp
@@ -3856,9 +3856,36 @@ void LinearScan::processKills(RefPosition* killRefPosition)
RefPosition* nextKill = killRefPosition->nextRefPosition;
regMaskTP killedRegs = killRefPosition->getKilledRegisters();
- while (killedRegs.IsNonEmpty())
+
+ freeKilledRegs(killRefPosition, killedRegs.getLow(), nextKill, REG_LOW_BASE);
+
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ freeKilledRegs(killRefPosition, killedRegs.getHigh(), nextKill, REG_HIGH_BASE);
+#endif
+
+ regsBusyUntilKill &= ~killRefPosition->getKilledRegisters();
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, nullptr, NONE,
+ killRefPosition->getKilledRegisters()));
+}
+
+//------------------------------------------------------------------------
+// freeKilledRegs: Handle registers that are being killed.
+//
+// Arguments:
+// killRefPosition - The RefPosition for the kill
+// killedRegs - Registers to kill
+// nextKill - The RefPosition for next kill
+// regBase - `0` or `64` based on the `killedRegs` being processed
+//
+void LinearScan::freeKilledRegs(RefPosition* killRefPosition,
+ SingleTypeRegSet killedRegs,
+ RefPosition* nextKill,
+ int regBase)
+{
+
+ while (killedRegs != RBM_NONE)
{
- regNumber killedReg = genFirstRegNumFromMaskAndToggle(killedRegs);
+ regNumber killedReg = (regNumber)(genFirstRegNumFromMaskAndToggle(killedRegs) + regBase);
RegRecord* regRecord = getRegisterRecord(killedReg);
Interval* assignedInterval = regRecord->assignedInterval;
if (assignedInterval != nullptr)
@@ -3874,10 +3901,6 @@ void LinearScan::processKills(RefPosition* killRefPosition)
: regRecord->recentRefPosition->nextRefPosition;
updateNextFixedRef(regRecord, regNextRefPos, nextKill);
}
-
- regsBusyUntilKill &= ~killRefPosition->getKilledRegisters();
- INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, nullptr, NONE,
- killRefPosition->getKilledRegisters()));
}
//------------------------------------------------------------------------
@@ -4555,14 +4578,34 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock)
}
}
#else
+
regMaskTP deadCandidates = ~liveRegs;
// Only focus on actual registers present
deadCandidates &= actualRegistersMask;
+ handleDeadCandidates(deadCandidates.getLow(), REG_LOW_BASE, inVarToRegMap);
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ handleDeadCandidates(deadCandidates.getHigh(), REG_HIGH_BASE, inVarToRegMap);
+#endif // HAS_MORE_THAN_64_REGISTERS
+#endif // TARGET_ARM
+}
- while (deadCandidates.IsNonEmpty())
+//------------------------------------------------------------------------
+// handleDeadCandidates: Handle registers that are assigned to local variables.
+//
+// Arguments:
+// deadCandidates - mask of registers.
+// regBase - base register number.
+// inVarToRegMap - variable to register map.
+//
+// Return Value:
+// None
+//
+void LinearScan::handleDeadCandidates(SingleTypeRegSet deadCandidates, int regBase, VarToRegMap inVarToRegMap)
+{
+ while (deadCandidates != RBM_NONE)
{
- regNumber reg = genFirstRegNumFromMaskAndToggle(deadCandidates);
+ regNumber reg = (regNumber)(genFirstRegNumFromMaskAndToggle(deadCandidates) + regBase);
RegRecord* physRegRecord = getRegisterRecord(reg);
makeRegAvailable(reg, physRegRecord->registerType);
@@ -4592,7 +4635,6 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock)
}
}
}
-#endif // TARGET_ARM
}
//------------------------------------------------------------------------
@@ -4741,6 +4783,22 @@ void LinearScan::freeRegister(RegRecord* physRegRecord)
}
}
+//------------------------------------------------------------------------
+// LinearScan::freeRegisters: Free the registers in 'regsToFree'
+//
+// Arguments:
+// regsToFree - the mask of registers to free, separated into low and high parts.
+// regBase - `0` or `64` depending on if the registers to be freed are in the lower or higher bank.
+//
+void LinearScan::freeRegistersSingleType(SingleTypeRegSet regsToFree, int regBase)
+{
+ while (regsToFree != RBM_NONE)
+ {
+ regNumber nextReg = (regNumber)(genFirstRegNumFromMaskAndToggle(regsToFree) + regBase);
+ RegRecord* regRecord = getRegisterRecord(nextReg);
+ freeRegister(regRecord);
+ }
+}
//------------------------------------------------------------------------
// LinearScan::freeRegisters: Free the registers in 'regsToFree'
//
@@ -4756,20 +4814,26 @@ void LinearScan::freeRegisters(regMaskTP regsToFree)
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FREE_REGS));
makeRegsAvailable(regsToFree);
+#ifdef TARGET_ARM
while (regsToFree.IsNonEmpty())
{
regNumber nextReg = genFirstRegNumFromMaskAndToggle(regsToFree);
RegRecord* regRecord = getRegisterRecord(nextReg);
-#ifdef TARGET_ARM
if (regRecord->assignedInterval != nullptr && (regRecord->assignedInterval->registerType == TYP_DOUBLE))
{
assert(genIsValidDoubleReg(nextReg));
regsToFree.RemoveRegNumFromMask(regNumber(nextReg + 1));
}
-#endif
freeRegister(regRecord);
}
+#else
+ freeRegistersSingleType(regsToFree.getLow(), REG_LOW_BASE);
+#ifdef HAS_MORE_THAN_64_REGISTERS
+ freeRegistersSingleType(regsToFree.getHigh(), REG_HIGH_BASE);
+#endif
+
+#endif
}
//------------------------------------------------------------------------
@@ -8218,7 +8282,7 @@ void LinearScan::resolveRegisters()
// Determine initial position for parameters
- if (varDsc->lvIsParam)
+ if (varDsc->lvIsParam || varDsc->lvIsParamRegTarget)
{
SingleTypeRegSet initialRegMask = interval->firstRefPosition->registerAssignment;
regNumber initialReg = (initialRegMask == RBM_NONE || interval->firstRefPosition->spillAfter)
diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h
index a00338006e865d..4b76b336be504c 100644
--- a/src/coreclr/jit/lsra.h
+++ b/src/coreclr/jit/lsra.h
@@ -1005,8 +1005,10 @@ class LinearScan : public LinearScanInterface
// Record variable locations at start/end of block
void processBlockStartLocations(BasicBlock* current);
- void processBlockEndLocations(BasicBlock* current);
- void resetAllRegistersState();
+
+ FORCEINLINE void handleDeadCandidates(SingleTypeRegSet deadCandidates, int regBase, VarToRegMap inVarToRegMap);
+ void processBlockEndLocations(BasicBlock* current);
+ void resetAllRegistersState();
#ifdef TARGET_ARM
bool isSecondHalfReg(RegRecord* regRec, Interval* interval);
@@ -1079,9 +1081,10 @@ class LinearScan : public LinearScanInterface
SingleTypeRegSet lowSIMDRegs();
SingleTypeRegSet internalFloatRegCandidates();
- void makeRegisterInactive(RegRecord* physRegRecord);
- void freeRegister(RegRecord* physRegRecord);
- void freeRegisters(regMaskTP regsToFree);
+ void makeRegisterInactive(RegRecord* physRegRecord);
+ void freeRegister(RegRecord* physRegRecord);
+ void freeRegisters(regMaskTP regsToFree);
+ FORCEINLINE void freeRegistersSingleType(SingleTypeRegSet regsToFree, int regBase);
// Get the type that this tree defines.
var_types getDefType(GenTree* tree)
@@ -1192,8 +1195,12 @@ class LinearScan : public LinearScanInterface
void setIntervalAsSplit(Interval* interval);
void spillInterval(Interval* interval, RefPosition* fromRefPosition DEBUGARG(RefPosition* toRefPosition));
- void processKills(RefPosition* killRefPosition);
- void spillGCRefs(RefPosition* killRefPosition);
+ void processKills(RefPosition* killRefPosition);
+ FORCEINLINE void freeKilledRegs(RefPosition* killRefPosition,
+ SingleTypeRegSet killedRegs,
+ RefPosition* nextKill,
+ int regBase);
+ void spillGCRefs(RefPosition* killRefPosition);
/*****************************************************************************
* Register selection
diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp
index 93d78e26d2d6bf..3ef6952fb8aec5 100644
--- a/src/coreclr/jit/lsraxarch.cpp
+++ b/src/coreclr/jit/lsraxarch.cpp
@@ -1105,19 +1105,17 @@ int LinearScan::BuildShiftRotate(GenTree* tree)
}
#endif
}
-#if defined(TARGET_64BIT)
- else if (tree->OperIsShift() && !tree->isContained() &&
+ else if (!tree->isContained() && (tree->OperIsShift() || source->isContained()) &&
compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2))
{
- // shlx (as opposed to mov+shl) instructions handles all register forms, but it does not handle contained form
- // for memory operand. Likewise for sarx and shrx.
+ // We don'thave any specific register requirements here, so skip the logic that
+ // reserves RCX or preferences the source reg.
// ToDo-APX : Remove when extended EVEX support is available
srcCount += BuildOperandUses(source, BuildApxIncompatibleGPRMask(source, srcCandidates));
srcCount += BuildOperandUses(shiftBy, BuildApxIncompatibleGPRMask(shiftBy, dstCandidates));
BuildDef(tree, BuildApxIncompatibleGPRMask(tree, dstCandidates, true));
return srcCount;
}
-#endif
else
{
// This ends up being BMI
@@ -1141,9 +1139,9 @@ int LinearScan::BuildShiftRotate(GenTree* tree)
#ifdef TARGET_X86
// The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
// we can have a three operand form.
- if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO)
+ if (tree->OperIs(GT_LSH_HI) || tree->OperIs(GT_RSH_LO))
{
- assert((source->OperGet() == GT_LONG) && source->isContained());
+ assert(source->OperIs(GT_LONG) && source->isContained());
GenTree* sourceLo = source->gtGetOp1();
GenTree* sourceHi = source->gtGetOp2();
@@ -1153,7 +1151,7 @@ int LinearScan::BuildShiftRotate(GenTree* tree)
if (!tree->isContained())
{
- if (tree->OperGet() == GT_LSH_HI)
+ if (tree->OperIs(GT_LSH_HI))
{
setDelayFree(sourceLoUse);
}
@@ -1174,6 +1172,7 @@ int LinearScan::BuildShiftRotate(GenTree* tree)
{
srcCount += BuildOperandUses(source, srcCandidates);
}
+
if (!tree->isContained())
{
if (!shiftBy->isContained())
@@ -2046,39 +2045,47 @@ int LinearScan::BuildIntrinsic(GenTree* tree)
#ifdef FEATURE_HW_INTRINSICS
//------------------------------------------------------------------------
-// SkipContainedCreateScalarUnsafe: Skips a contained CreateScalarUnsafe node
+// SkipContainedUnaryOp: Skips a contained non-memory or const node
// and gets the underlying op1 instead
//
// Arguments:
// node - The node to handle
//
// Return Value:
-// If node is a contained CreateScalarUnsafe, it's op1 is returned;
+// If node is a contained non-memory or const unary op, its op1 is returned;
// otherwise node is returned unchanged.
-static GenTree* SkipContainedCreateScalarUnsafe(GenTree* node)
+static GenTree* SkipContainedUnaryOp(GenTree* node)
{
- if (!node->OperIsHWIntrinsic() || !node->isContained())
+ if (!node->isContained())
{
return node;
}
- GenTreeHWIntrinsic* hwintrinsic = node->AsHWIntrinsic();
- NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId();
-
- switch (intrinsicId)
+ if (node->OperIsHWIntrinsic())
{
- case NI_Vector128_CreateScalarUnsafe:
- case NI_Vector256_CreateScalarUnsafe:
- case NI_Vector512_CreateScalarUnsafe:
- {
- return hwintrinsic->Op(1);
- }
+ GenTreeHWIntrinsic* hwintrinsic = node->AsHWIntrinsic();
+ NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId();
- default:
+ switch (intrinsicId)
{
- return node;
+ case NI_Vector128_CreateScalar:
+ case NI_Vector256_CreateScalar:
+ case NI_Vector512_CreateScalar:
+ case NI_Vector128_CreateScalarUnsafe:
+ case NI_Vector256_CreateScalarUnsafe:
+ case NI_Vector512_CreateScalarUnsafe:
+ {
+ return hwintrinsic->Op(1);
+ }
+
+ default:
+ {
+ break;
+ }
}
}
+
+ return node;
}
//------------------------------------------------------------------------
@@ -2135,8 +2142,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
}
else
{
- // A contained CreateScalarUnsafe is special in that we're not containing it to load from
- // memory and it isn't a constant. Instead, its essentially a "transparent" node we're ignoring
+ // In a few cases, we contain an operand that isn't a load from memory or a constant. Instead,
+ // it is essentially a "transparent" node we're ignoring or handling specially in codegen
// to simplify the overall IR handling. As such, we need to "skip" such nodes when present and
// get the underlying op1 so that delayFreeUse and other preferencing remains correct.
@@ -2145,37 +2152,37 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
GenTree* op3 = nullptr;
GenTree* op4 = nullptr;
GenTree* op5 = nullptr;
- GenTree* lastOp = SkipContainedCreateScalarUnsafe(intrinsicTree->Op(numArgs));
+ GenTree* lastOp = SkipContainedUnaryOp(intrinsicTree->Op(numArgs));
switch (numArgs)
{
case 5:
{
- op5 = SkipContainedCreateScalarUnsafe(intrinsicTree->Op(5));
+ op5 = SkipContainedUnaryOp(intrinsicTree->Op(5));
FALLTHROUGH;
}
case 4:
{
- op4 = SkipContainedCreateScalarUnsafe(intrinsicTree->Op(4));
+ op4 = SkipContainedUnaryOp(intrinsicTree->Op(4));
FALLTHROUGH;
}
case 3:
{
- op3 = SkipContainedCreateScalarUnsafe(intrinsicTree->Op(3));
+ op3 = SkipContainedUnaryOp(intrinsicTree->Op(3));
FALLTHROUGH;
}
case 2:
{
- op2 = SkipContainedCreateScalarUnsafe(intrinsicTree->Op(2));
+ op2 = SkipContainedUnaryOp(intrinsicTree->Op(2));
FALLTHROUGH;
}
case 1:
{
- op1 = SkipContainedCreateScalarUnsafe(intrinsicTree->Op(1));
+ op1 = SkipContainedUnaryOp(intrinsicTree->Op(1));
break;
}
@@ -2224,11 +2231,14 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
// must be handled within the case.
switch (intrinsicId)
{
+ case NI_Vector128_CreateScalar:
+ case NI_Vector256_CreateScalar:
+ case NI_Vector512_CreateScalar:
case NI_Vector128_CreateScalarUnsafe:
- case NI_Vector128_ToScalar:
case NI_Vector256_CreateScalarUnsafe:
- case NI_Vector256_ToScalar:
case NI_Vector512_CreateScalarUnsafe:
+ case NI_Vector128_ToScalar:
+ case NI_Vector256_ToScalar:
case NI_Vector512_ToScalar:
{
assert(numArgs == 1);
@@ -2242,17 +2252,38 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
}
else
{
- // We will either be in memory and need to be moved
- // into a register of the appropriate size or we
- // are already in an XMM/YMM/ZMM register and can stay
- // where we are.
+ // CreateScalarUnsafe and ToScalar are essentially no-ops for floating point types and can reuse
+ // the op1 register. CreateScalar needs to clear the upper elements, so if we have a float and
+ // can't use insertps to zero the upper elements in-place, we'll need a different target reg.
- tgtPrefUse = BuildUse(op1);
+ RefPosition* op1Use = BuildUse(op1);
srcCount += 1;
+
+ if ((baseType == TYP_FLOAT) && HWIntrinsicInfo::IsVectorCreateScalar(intrinsicId) &&
+ !compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41))
+ {
+ setDelayFree(op1Use);
+ }
+ else
+ {
+ tgtPrefUse = op1Use;
+ }
}
buildUses = false;
}
+#if TARGET_X86
+ else if (varTypeIsByte(baseType) && HWIntrinsicInfo::IsVectorToScalar(intrinsicId))
+ {
+ dstCandidates = allByteRegs();
+ }
+ else if (varTypeIsLong(baseType) && !compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41))
+ {
+ // For SSE2 fallbacks, we will need a temp register to insert the upper half of a long
+ buildInternalFloatRegisterDefForNode(intrinsicTree);
+ setInternalRegsDelayFree = true;
+ }
+#endif // TARGET_X86
break;
}
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 43390fd874418a..53b9bc8166d44b 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -2287,6 +2287,7 @@ bool Compiler::fgTryMorphStructArg(CallArg* arg)
#else
*use = fieldList->SoleFieldOrThis();
#endif
+ *use = fgMorphTree(*use);
}
else
{
@@ -2335,7 +2336,8 @@ bool Compiler::fgTryMorphStructArg(CallArg* arg)
// Try to see if we can use the promoted fields to pass this argument.
//
- if (varDsc->lvPromoted && (varDsc->lvFieldCnt == arg->AbiInfo.CountRegsAndStackSlots()))
+ if (varDsc->lvPromoted && !varDsc->lvDoNotEnregister &&
+ (varDsc->lvFieldCnt == arg->AbiInfo.CountRegsAndStackSlots()))
{
bool fieldsMatch = true;
@@ -2366,6 +2368,7 @@ bool Compiler::fgTryMorphStructArg(CallArg* arg)
if (fieldsMatch)
{
newArg = fgMorphLclToFieldList(lclNode)->SoleFieldOrThis();
+ newArg = fgMorphTree(newArg);
}
}
}
@@ -2511,7 +2514,7 @@ bool Compiler::fgTryMorphStructArg(CallArg* arg)
lvaSetVarDoNotEnregister(lclVar->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField));
}
}
- result->SetMorphed(this);
+ result = fgMorphTree(result);
return result;
}
else
@@ -2532,7 +2535,7 @@ bool Compiler::fgTryMorphStructArg(CallArg* arg)
}
GenTree* indir = gtNewIndir(type, addr);
- indir->SetMorphed(this, /* doChildren*/ true);
+ indir->SetMorphed(this, /* doChildren */ true);
return indir;
}
};
@@ -2593,16 +2596,15 @@ GenTreeFieldList* Compiler::fgMorphLclToFieldList(GenTreeLclVar* lcl)
unsigned fieldLclNum = varDsc->lvFieldLclStart;
GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList();
- fieldList->SetMorphed(this);
for (unsigned i = 0; i < fieldCount; i++)
{
LclVarDsc* fieldVarDsc = lvaGetDesc(fieldLclNum);
GenTree* lclVar = gtNewLclvNode(fieldLclNum, fieldVarDsc->TypeGet());
- lclVar->SetMorphed(this);
fieldList->AddField(this, lclVar, fieldVarDsc->lvFldOffset, fieldVarDsc->TypeGet());
fieldLclNum++;
}
+
return fieldList;
}
@@ -4684,8 +4686,13 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call)
// fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
// a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the
// generic type parameters of both caller and callee generic method are the same.
- if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() &&
- !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && !varTypeIsStruct(call->TypeGet()))
+ //
+ // For OSR, we prefer to tailcall for call counting + potential transition
+ // into the actual tier1 version.
+ //
+ if (opts.compTailCallLoopOpt && canFastTailCall && !opts.IsOSR() && gtIsRecursiveCall(call) &&
+ !lvaReportParamTypeArg() && !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam &&
+ !varTypeIsStruct(call->TypeGet()))
{
fastTailCallToLoop = true;
}
@@ -6159,23 +6166,12 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa
// Remove the call
fgRemoveStmt(block, lastStmt);
+ assert(!opts.IsOSR());
// Set the loop edge.
- BasicBlock* entryBB;
- if (opts.IsOSR())
- {
- // Todo: this may not look like a viable loop header.
- // Might need the moral equivalent of an init BB.
- entryBB = fgEntryBB;
- }
- else
- {
- assert(doesMethodHaveRecursiveTailcall());
-
- // TODO-Cleanup: We should really be expanding tailcalls into loops
- // much earlier than this, at a place where we do not need to have
- // hacky workarounds to figure out what the actual IL entry block is.
- entryBB = fgGetFirstILBlock();
- }
+ // TODO-Cleanup: We should really be expanding tailcalls into loops much
+ // earlier than this, at a place where we can just use the init BB here.
+ BasicBlock* entryBB = fgGetFirstILBlock();
+ assert(doesMethodHaveRecursiveTailcall());
FlowEdge* const newEdge = fgAddRefPred(entryBB, block);
block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
@@ -8366,7 +8362,10 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
GenTree*& retVal = tree->AsOp()->ReturnValueRef();
if ((retVal != nullptr) && ((genReturnBB == nullptr) || (compCurBB == genReturnBB)))
{
- fgTryReplaceStructLocalWithFields(&retVal);
+ if (fgTryReplaceStructLocalWithFields(&retVal))
+ {
+ retVal = fgMorphTree(retVal);
+ }
}
break;
}
@@ -8424,19 +8423,22 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
// Notes:
// Currently only called when the tree parent is a GT_RETURN/GT_SWIFT_ERROR_RET.
//
-void Compiler::fgTryReplaceStructLocalWithFields(GenTree** use)
+bool Compiler::fgTryReplaceStructLocalWithFields(GenTree** use)
{
if (!(*use)->OperIs(GT_LCL_VAR))
{
- return;
+ return false;
}
LclVarDsc* varDsc = lvaGetDesc((*use)->AsLclVar());
- if (!varDsc->lvDoNotEnregister && varDsc->lvPromoted)
+ if (varDsc->lvDoNotEnregister || !varDsc->lvPromoted)
{
- *use = fgMorphLclToFieldList((*use)->AsLclVar());
+ return false;
}
+
+ *use = fgMorphLclToFieldList((*use)->AsLclVar());
+ return true;
}
//------------------------------------------------------------------------
diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 751b11fc8898ee..af5e1b8d0f31c3 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -18,6 +18,175 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "gentree.h"
#include "jitstd/algorithm.h"
+//------------------------------------------------------------------------
+// ObjectAllocator: construct the object allocator object
+//
+// Arguments:
+// comp - compiler instance
+//
+// Notes:
+// Runs only if Compiler::optMethodFlags has flag OMF_HAS_NEWOBJ set.
+//
+// Builds a connection graph where nodes mostly represent local vars,
+// showing how locals can assign values to one another.
+//
+// The graph also includes a few absract node types: a node representing
+// an unknow source of values, and (pseudo local) nodes representing
+// assignments that only happen under particular conditions.
+//
+ObjectAllocator::ObjectAllocator(Compiler* comp)
+ : Phase(comp, PHASE_ALLOCATE_OBJECTS)
+ , m_IsObjectStackAllocationEnabled(false)
+ , m_AnalysisDone(false)
+ , m_bvCount(0)
+ , m_bitVecTraits(BitVecTraits(comp->lvaCount, comp))
+ , m_HeapLocalToStackLocalMap(comp->getAllocator(CMK_ObjectAllocator))
+ , m_EnumeratorLocalToPseudoLocalMap(comp->getAllocator(CMK_ObjectAllocator))
+ , m_CloneMap(comp->getAllocator(CMK_ObjectAllocator))
+ , m_nextLocalIndex(0)
+ , m_firstPseudoLocalNum(BAD_VAR_NUM)
+ , m_firstPseudoLocalIndex(BAD_VAR_NUM)
+ , m_numPseudoLocals(0)
+ , m_maxPseudoLocals(0)
+ , m_regionsToClone(0)
+{
+ m_EscapingPointers = BitVecOps::UninitVal();
+ m_PossiblyStackPointingPointers = BitVecOps::UninitVal();
+ m_DefinitelyStackPointingPointers = BitVecOps::UninitVal();
+ m_ConnGraphAdjacencyMatrix = nullptr;
+ m_StackAllocMaxSize = (unsigned)JitConfig.JitObjectStackAllocationSize();
+}
+
+//------------------------------------------------------------------------
+// IsTrackedType: see if this type is being tracked by escape analysis
+//
+// Arguments:
+// type - type of interest
+//
+// Returns:
+// true if so
+//
+bool ObjectAllocator::IsTrackedType(var_types type)
+{
+ const bool isTrackableScalar = (type == TYP_REF) || (genActualType(type) == TYP_I_IMPL) || (type == TYP_BYREF);
+ return isTrackableScalar;
+}
+
+//------------------------------------------------------------------------
+// IsTrackedLocal: see if this local is being tracked by escape analysis
+//
+// Arguments:
+// lclNum - local of interest
+//
+// Returns:
+// true if so
+//
+bool ObjectAllocator::IsTrackedLocal(unsigned lclNum)
+{
+ assert(lclNum < comp->lvaCount);
+ LclVarDsc* const varDsc = comp->lvaGetDesc(lclNum);
+ return varDsc->lvTracked;
+}
+
+//------------------------------------------------------------------------
+// HasIndex: see if a given local has a tracking index
+//
+// Arguments:
+// lclNum -- local to query
+//
+// Returns:
+// true if so
+//
+bool ObjectAllocator::HasIndex(unsigned lclNum)
+{
+ if (lclNum < comp->lvaCount)
+ {
+ LclVarDsc* const varDsc = comp->lvaGetDesc(lclNum);
+ return varDsc->lvTracked;
+ }
+
+ if ((lclNum >= m_firstPseudoLocalNum) && (lclNum < m_bvCount))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+//------------------------------------------------------------------------
+// LocalToIndex: get the bit vector index for a local or pseudo-local
+//
+// Arguments:
+// lclNum -- local var num or pseudo local var num
+//
+// Returns:
+// bvIndex to use
+//
+unsigned ObjectAllocator::LocalToIndex(unsigned lclNum)
+{
+ unsigned result = BAD_VAR_NUM;
+
+ if (lclNum < comp->lvaCount)
+ {
+ assert(IsTrackedLocal(lclNum));
+ LclVarDsc* const varDsc = comp->lvaGetDesc(lclNum);
+ result = varDsc->lvVarIndex;
+ }
+ else
+ {
+ result = m_firstPseudoLocalIndex + (lclNum - m_firstPseudoLocalNum);
+ }
+
+ assert(result < m_bvCount);
+
+ return result;
+}
+
+//------------------------------------------------------------------------
+// IndexToLocal: get the local num for a bv index
+//
+// Arguments:
+// bvIndex -- bit vector index
+//
+// Returns:
+// local num
+//
+unsigned ObjectAllocator::IndexToLocal(unsigned bvIndex)
+{
+ assert(bvIndex < m_bvCount);
+ unsigned result = BAD_VAR_NUM;
+
+ if (bvIndex < m_firstPseudoLocalIndex)
+ {
+ result = comp->lvaTrackedToVarNum[bvIndex];
+ assert(IsTrackedLocal(result));
+ }
+ else
+ {
+ result = m_firstPseudoLocalNum + (bvIndex - m_firstPseudoLocalIndex);
+ }
+
+ return result;
+}
+
+#ifdef DEBUG
+//------------------------------------------------------------------------------
+// DumpIndex: write a description of a given bv index
+//
+// Arguments:
+// bvIndex - index to describe
+//
+// Notes:
+// includes leading space
+//
+void ObjectAllocator::DumpIndex(unsigned bvIndex)
+{
+ const unsigned lclNum = IndexToLocal(bvIndex);
+ const bool isLocalVar = (lclNum < m_firstPseudoLocalNum);
+ printf(" %c%02u", isLocalVar ? 'V' : 'P', lclNum);
+}
+#endif
+
//------------------------------------------------------------------------
// DoPhase: Run analysis (if object stack allocation is enabled) and then
// morph each GT_ALLOCOBJ node either into an allocation helper
@@ -101,7 +270,8 @@ PhaseStatus ObjectAllocator::DoPhase()
void ObjectAllocator::MarkLclVarAsEscaping(unsigned int lclNum)
{
- BitVecOps::AddElemD(&m_bitVecTraits, m_EscapingPointers, lclNum);
+ const unsigned bvIndex = LocalToIndex(lclNum);
+ BitVecOps::AddElemD(&m_bitVecTraits, m_EscapingPointers, bvIndex);
}
//------------------------------------------------------------------------------
@@ -114,7 +284,8 @@ void ObjectAllocator::MarkLclVarAsEscaping(unsigned int lclNum)
void ObjectAllocator::MarkLclVarAsPossiblyStackPointing(unsigned int lclNum)
{
- BitVecOps::AddElemD(&m_bitVecTraits, m_PossiblyStackPointingPointers, lclNum);
+ const unsigned bvIndex = LocalToIndex(lclNum);
+ BitVecOps::AddElemD(&m_bitVecTraits, m_PossiblyStackPointingPointers, bvIndex);
}
//------------------------------------------------------------------------------
@@ -127,7 +298,9 @@ void ObjectAllocator::MarkLclVarAsPossiblyStackPointing(unsigned int lclNum)
void ObjectAllocator::MarkLclVarAsDefinitelyStackPointing(unsigned int lclNum)
{
- BitVecOps::AddElemD(&m_bitVecTraits, m_DefinitelyStackPointingPointers, lclNum);
+ const unsigned bvIndex = LocalToIndex(lclNum);
+ JITDUMP("Marking V%02u (0x%02x) as definitely stack-pointing\n", lclNum, bvIndex);
+ BitVecOps::AddElemD(&m_bitVecTraits, m_DefinitelyStackPointingPointers, bvIndex);
}
//------------------------------------------------------------------------------
@@ -140,23 +313,188 @@ void ObjectAllocator::MarkLclVarAsDefinitelyStackPointing(unsigned int lclNum)
void ObjectAllocator::AddConnGraphEdge(unsigned int sourceLclNum, unsigned int targetLclNum)
{
- BitVecOps::AddElemD(&m_bitVecTraits, m_ConnGraphAdjacencyMatrix[sourceLclNum], targetLclNum);
+ const unsigned sourceBvIndex = LocalToIndex(sourceLclNum);
+ const unsigned targetBvIndex = LocalToIndex(targetLclNum);
+ BitVecOps::AddElemD(&m_bitVecTraits, m_ConnGraphAdjacencyMatrix[sourceBvIndex], targetBvIndex);
+}
+
+//------------------------------------------------------------------------
+// PrepareAnalysis: determine how to model the escape analysis problem
+// with bit vectors.
+//
+void ObjectAllocator::PrepareAnalysis()
+{
+ // Determine how locals map to indicies in the bit vectors / connection graph.
+ //
+ // In "lcl num" space
+ //
+ // We reserve the range [0...L-1] for the initial set of locals.
+ // Here L is the initial lvaCount.
+ //
+ // If conditional escape analysis is enabled, we reserve the range [L...L+M-1]
+ // for locals allocated during the conditional escape analysis expansions,
+ // where M is the maximum number of pseudo-vars.
+ //
+ // We reserve the range [L+M ... L+2M-1] for pseudo locals themselves.
+ //
+ // In "bv" space
+ //
+ // We reserve the range [0...N-1] for the initial set of tracked locals.
+ // Here N <= L is the number of tracked locals, determined below, an each
+ // tracked local has an index assigned in this range.
+ //
+ // If conditional escape analysis is enabled, we reserve the range [N...N+M-1]
+ // for locals allocated during the conditional escape analysis expansions,
+ // where N is the maximum number of pseudo-vars.
+ //
+ // We reserve the range [N+M ... N+2M-1] for pseudo locals themselves.
+ //
+ // LocalToIndex translates from "lcl num" space to "bv" space
+ // IndexToLocal translates from "bv" space space to "lcl num" space
+ //
+ const unsigned localCount = comp->lvaCount;
+ unsigned bvNext = 0;
+
+ // Enumerate which locals are going to appear in our connection
+ // graph, and assign them BV indicies.
+ //
+ for (unsigned lclNum = 0; lclNum < localCount; lclNum++)
+ {
+ LclVarDsc* const varDsc = comp->lvaGetDesc(lclNum);
+
+ if (IsTrackedType(varDsc->TypeGet()))
+ {
+ varDsc->lvTracked = 1;
+ varDsc->lvVarIndex = (unsigned short)bvNext;
+ bvNext++;
+ }
+ else
+ {
+ varDsc->lvTracked = 0;
+ varDsc->lvVarIndex = 0;
+ }
+ }
+
+ m_nextLocalIndex = bvNext;
+
+ // If we are going to do any conditional escape analysis, determine
+ // how much extra BV space we'll need.
+ //
+ bool const hasEnumeratorLocals = comp->hasImpEnumeratorGdvLocalMap();
+
+ if (hasEnumeratorLocals)
+ {
+ unsigned const enumeratorLocalCount = comp->getImpEnumeratorGdvLocalMap()->GetCount();
+ assert(enumeratorLocalCount > 0);
+
+ // For now, disable conditional escape analysis with OSR
+ // since the dominance picture is muddled at this point.
+ //
+ // The conditionally escaping allocation sites will likely be in loops anyways.
+ //
+ bool const enableConditionalEscape = JitConfig.JitObjectStackAllocationConditionalEscape() > 0;
+ bool const isOSR = comp->opts.IsOSR();
+
+ if (enableConditionalEscape && !isOSR)
+ {
+
+#ifdef DEBUG
+ static ConfigMethodRange JitObjectStackAllocationConditionalEscapeRange;
+ JitObjectStackAllocationConditionalEscapeRange.EnsureInit(
+ JitConfig.JitObjectStackAllocationConditionalEscapeRange());
+ const unsigned hash = comp->info.compMethodHash();
+ const bool inRange = JitObjectStackAllocationConditionalEscapeRange.Contains(hash);
+#else
+ const bool inRange = true;
+#endif
+
+ if (inRange)
+ {
+ JITDUMP("Enabling conditional escape analysis [%u pseudo-vars]\n", enumeratorLocalCount);
+ m_maxPseudoLocals = enumeratorLocalCount;
+ }
+ else
+ {
+ JITDUMP("Not enabling conditional escape analysis (disabled by range config)\n");
+ }
+ }
+ else
+ {
+ JITDUMP("Not enabling conditional escape analysis [%u pseudo-vars]: %s\n", enumeratorLocalCount,
+ enableConditionalEscape ? "OSR" : "disabled by config");
+ }
+ }
+
+ // When we clone to prevent conditional escape, we'll also create a new local
+ // var that we will track. So we need to leave room for these vars. There can
+ // be as many of these as there are pseudo locals.
+ //
+ m_firstPseudoLocalNum = localCount + m_maxPseudoLocals; // L + M, per above
+ m_firstPseudoLocalIndex = bvNext + m_maxPseudoLocals; // N, per above
+ bvNext += 2 * m_maxPseudoLocals;
+
+ // Now set up the BV traits.
+ //
+ m_bvCount = bvNext;
+ m_bitVecTraits = BitVecTraits(m_bvCount, comp);
+
+ // Create the reverse mapping from bvIndex to local var index
+ // (leave room for locals we may allocate)
+ //
+ if (comp->lvaTrackedToVarNumSize < m_firstPseudoLocalNum)
+ {
+ comp->lvaTrackedToVarNumSize = m_firstPseudoLocalNum;
+ comp->lvaTrackedToVarNum = new (comp->getAllocator(CMK_LvaTable)) unsigned[comp->lvaTrackedToVarNumSize];
+ }
+
+ for (unsigned lclNum = 0; lclNum < localCount; lclNum++)
+ {
+ LclVarDsc* const varDsc = comp->lvaGetDesc(lclNum);
+
+ if (varDsc->lvTracked)
+ {
+ comp->lvaTrackedToVarNum[varDsc->lvVarIndex] = lclNum;
+ }
+ }
+
+ JITDUMP("%u locals, %u tracked by escape analysis\n", localCount, m_nextLocalIndex);
+
+ if (m_nextLocalIndex > 0)
+ {
+ JITDUMP("\nLocal var range [%02u...%02u]\n", 0, localCount);
+ if (m_maxPseudoLocals > 0)
+ {
+ JITDUMP("Enumerator var range [%02u...%02u]\n", localCount, localCount + m_maxPseudoLocals - 1);
+ JITDUMP("Pseudo var range [%02u...%02u]\n", m_firstPseudoLocalNum,
+ m_firstPseudoLocalNum + m_maxPseudoLocals - 1);
+ }
+
+ JITDUMP("\nLocal var bv range [%02u...%02u]\n", 0, m_nextLocalIndex - 1);
+ if (m_maxPseudoLocals > 0)
+ {
+ JITDUMP("Enumerator var bv range [%02u...%02u]\n", m_nextLocalIndex,
+ m_nextLocalIndex + m_maxPseudoLocals - 1);
+ JITDUMP("Pseudo var bv range [%02u...%02u]\n", m_nextLocalIndex + m_maxPseudoLocals,
+ m_nextLocalIndex + 2 * m_maxPseudoLocals - 1);
+ }
+ }
}
//------------------------------------------------------------------------
// DoAnalysis: Walk over basic blocks of the method and detect all local
// variables that can be allocated on the stack.
-
+//
void ObjectAllocator::DoAnalysis()
{
assert(m_IsObjectStackAllocationEnabled);
assert(!m_AnalysisDone);
- if (comp->lvaCount > 0)
+ PrepareAnalysis();
+
+ if (m_bvCount > 0)
{
- m_EscapingPointers = BitVecOps::MakeEmpty(&m_bitVecTraits);
- m_ConnGraphAdjacencyMatrix =
- new (comp->getAllocator(CMK_ObjectAllocator)) BitSetShortLongRep[comp->lvaCount + m_maxPseudoLocals + 1];
+ m_EscapingPointers = BitVecOps::MakeEmpty(&m_bitVecTraits);
+ m_ConnGraphAdjacencyMatrix = new (comp->getAllocator(CMK_ObjectAllocator)) BitSetShortLongRep[m_bvCount];
// If we are doing conditional escape analysis, we also need to compute dominance.
//
@@ -263,28 +601,24 @@ void ObjectAllocator::MarkEscapingVarsAndBuildConnGraph()
for (unsigned int lclNum = 0; lclNum < comp->lvaCount; ++lclNum)
{
- var_types type = comp->lvaTable[lclNum].TypeGet();
-
- if (type == TYP_REF || genActualType(type) == TYP_I_IMPL || type == TYP_BYREF)
+ if (!IsTrackedLocal(lclNum))
{
- m_ConnGraphAdjacencyMatrix[lclNum] = BitVecOps::MakeEmpty(&m_bitVecTraits);
-
- if (comp->lvaTable[lclNum].IsAddressExposed())
- {
- JITDUMP(" V%02u is address exposed\n", lclNum);
- MarkLclVarAsEscaping(lclNum);
- }
+ continue;
}
- else
+
+ const unsigned bvIndex = LocalToIndex(lclNum);
+ m_ConnGraphAdjacencyMatrix[bvIndex] = BitVecOps::MakeEmpty(&m_bitVecTraits);
+
+ if (comp->lvaTable[lclNum].IsAddressExposed())
{
- // Variable that may not point to objects will not participate in our analysis.
- m_ConnGraphAdjacencyMatrix[lclNum] = BitVecOps::UninitVal();
+ JITDUMP(" V%02u is address exposed\n", lclNum);
+ MarkLclVarAsEscaping(lclNum);
}
}
for (unsigned int p = 0; p < m_maxPseudoLocals; p++)
{
- m_ConnGraphAdjacencyMatrix[p + comp->lvaCount] = BitVecOps::MakeEmpty(&m_bitVecTraits);
+ m_ConnGraphAdjacencyMatrix[p + m_firstPseudoLocalIndex] = BitVecOps::MakeEmpty(&m_bitVecTraits);
}
// We should have computed the DFS tree already.
@@ -321,29 +655,29 @@ void ObjectAllocator::ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& e
JITDUMP("\nComputing escape closure\n\n");
bool doOneMoreIteration = true;
BitSetShortLongRep newEscapingNodes = BitVecOps::UninitVal();
- unsigned int lclNum;
+ unsigned int lclIndex;
while (doOneMoreIteration)
{
BitVecOps::Iter iterator(bitVecTraits, escapingNodesToProcess);
doOneMoreIteration = false;
- while (iterator.NextElem(&lclNum))
+ while (iterator.NextElem(&lclIndex))
{
- if (m_ConnGraphAdjacencyMatrix[lclNum] != nullptr)
+ if (m_ConnGraphAdjacencyMatrix[lclIndex] != nullptr)
{
doOneMoreIteration = true;
// newEscapingNodes = adjacentNodes[lclNum]
- BitVecOps::Assign(bitVecTraits, newEscapingNodes, m_ConnGraphAdjacencyMatrix[lclNum]);
+ BitVecOps::Assign(bitVecTraits, newEscapingNodes, m_ConnGraphAdjacencyMatrix[lclIndex]);
// newEscapingNodes = newEscapingNodes \ escapingNodes
BitVecOps::DiffD(bitVecTraits, newEscapingNodes, escapingNodes);
// escapingNodesToProcess = escapingNodesToProcess U newEscapingNodes
BitVecOps::UnionD(bitVecTraits, escapingNodesToProcess, newEscapingNodes);
// escapingNodes = escapingNodes U newEscapingNodes
BitVecOps::UnionD(bitVecTraits, escapingNodes, newEscapingNodes);
- // escapingNodesToProcess = escapingNodesToProcess \ { lclNum }
- BitVecOps::RemoveElemD(bitVecTraits, escapingNodesToProcess, lclNum);
+ // escapingNodesToProcess = escapingNodesToProcess \ { lclIndex }
+ BitVecOps::RemoveElemD(bitVecTraits, escapingNodesToProcess, lclIndex);
#ifdef DEBUG
// Print the first witness to new escapes.
@@ -351,12 +685,13 @@ void ObjectAllocator::ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& e
if (!BitVecOps::IsEmpty(bitVecTraits, newEscapingNodes))
{
BitVecOps::Iter iterator(bitVecTraits, newEscapingNodes);
- unsigned int newLclNum;
- while (iterator.NextElem(&newLclNum))
+ unsigned int newLclIndex;
+ while (iterator.NextElem(&newLclIndex))
{
- // Note P's never are sources of assignments...
- JITDUMP("%c%02u causes V%02u to escape\n", lclNum >= comp->lvaCount ? 'P' : 'V', lclNum,
- newLclNum);
+ JITDUMPEXEC(DumpIndex(lclIndex));
+ JITDUMP(" causes ");
+ JITDUMPEXEC(DumpIndex(newLclIndex));
+ JITDUMP(" to escape\n");
}
}
#endif
@@ -395,70 +730,203 @@ void ObjectAllocator::ComputeStackObjectPointers(BitVecTraits* bitVecTraits)
changed = false;
for (unsigned int lclNum = 0; lclNum < comp->lvaCount; ++lclNum)
{
- LclVarDsc* lclVarDsc = comp->lvaGetDesc(lclNum);
- var_types type = lclVarDsc->TypeGet();
+ if (!IsTrackedLocal(lclNum))
+ {
+ continue;
+ }
+
+ const unsigned lclIndex = LocalToIndex(lclNum);
- if (type == TYP_REF || type == TYP_I_IMPL || type == TYP_BYREF)
+ if (!MayLclVarPointToStack(lclNum) &&
+ !BitVecOps::IsEmptyIntersection(bitVecTraits, m_PossiblyStackPointingPointers,
+ m_ConnGraphAdjacencyMatrix[lclIndex]))
{
- if (!MayLclVarPointToStack(lclNum) &&
- !BitVecOps::IsEmptyIntersection(bitVecTraits, m_PossiblyStackPointingPointers,
- m_ConnGraphAdjacencyMatrix[lclNum]))
- {
- // We discovered a new pointer that may point to the stack.
- MarkLclVarAsPossiblyStackPointing(lclNum);
+ // We discovered a new pointer that may point to the stack.
+ MarkLclVarAsPossiblyStackPointing(lclNum);
- // Check if this pointer always points to the stack.
- // For OSR the reference may be pointing at the heap-allocated Tier0 version.
- //
- if ((lclVarDsc->lvSingleDef == 1) && !comp->opts.IsOSR())
+ // Check if this pointer always points to the stack.
+ // For OSR the reference may be pointing at the heap-allocated Tier0 version.
+ //
+ LclVarDsc* lclVarDsc = comp->lvaGetDesc(lclNum);
+
+ if ((lclVarDsc->lvSingleDef == 1) && !comp->opts.IsOSR())
+ {
+ // Check if we know what is assigned to this pointer.
+ unsigned bitCount = BitVecOps::Count(bitVecTraits, m_ConnGraphAdjacencyMatrix[lclIndex]);
+ assert(bitCount <= 1);
+ if (bitCount == 1)
{
- // Check if we know what is assigned to this pointer.
- unsigned bitCount = BitVecOps::Count(bitVecTraits, m_ConnGraphAdjacencyMatrix[lclNum]);
- assert(bitCount <= 1);
- if (bitCount == 1)
+ BitVecOps::Iter iter(bitVecTraits, m_ConnGraphAdjacencyMatrix[lclIndex]);
+ unsigned rhsLclIndex = 0;
+ iter.NextElem(&rhsLclIndex);
+ unsigned rhsLclNum = IndexToLocal(rhsLclIndex);
+ if (DoesLclVarPointToStack(rhsLclNum))
{
- BitVecOps::Iter iter(bitVecTraits, m_ConnGraphAdjacencyMatrix[lclNum]);
- unsigned rhsLclNum = 0;
- iter.NextElem(&rhsLclNum);
-
- if (DoesLclVarPointToStack(rhsLclNum))
- {
- // The only store to lclNum local is the definitely-stack-pointing
- // rhsLclNum local so lclNum local is also definitely-stack-pointing.
- MarkLclVarAsDefinitelyStackPointing(lclNum);
- }
+ // The only store to lclNum local is the definitely-stack-pointing
+ // rhsLclNum local so lclNum local is also definitely-stack-pointing.
+ MarkLclVarAsDefinitelyStackPointing(lclNum);
}
}
- changed = true;
}
+ changed = true;
}
}
}
- JITDUMP("Definitely stack-pointing locals:");
+#ifdef DEBUG
+ if (comp->verbose)
{
- BitVecOps::Iter iter(bitVecTraits, m_DefinitelyStackPointingPointers);
- unsigned lclNum = 0;
- while (iter.NextElem(&lclNum))
+ printf("Definitely stack-pointing locals:");
+ {
+ BitVecOps::Iter iter(bitVecTraits, m_DefinitelyStackPointingPointers);
+ unsigned lclIndex = 0;
+ while (iter.NextElem(&lclIndex))
+ {
+ DumpIndex(lclIndex);
+ }
+ printf("\n");
+ }
+
+ printf("Possibly stack-pointing locals:");
{
- JITDUMP(" V%02u", lclNum);
+ BitVecOps::Iter iter(bitVecTraits, m_PossiblyStackPointingPointers);
+ unsigned lclIndex = 0;
+ while (iter.NextElem(&lclIndex))
+ {
+ if (!BitVecOps::IsMember(bitVecTraits, m_DefinitelyStackPointingPointers, lclIndex))
+ {
+ DumpIndex(lclIndex);
+ }
+ }
+ printf("\n");
}
- JITDUMP("\n");
}
+#endif
+}
+
+//------------------------------------------------------------------------
+// CanAllocateLclVarOnStack: Returns true iff local variable can be
+// allocated on the stack.
+//
+// Arguments:
+// lclNum - Local variable number
+// clsHnd - Class/struct handle of the variable class
+// allocType - Type of allocation (newobj or newarr)
+// length - Length of the array (for newarr)
+// blockSize - [out, optional] exact size of the object
+// reason - [out, required] if result is false, reason why
+// preliminaryCheck - if true, allow checking before analysis is done
+// (for things that inherently disqualify the local)
+//
+// Return Value:
+// Returns true iff local variable can be allocated on the stack.
+//
+bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNum,
+ CORINFO_CLASS_HANDLE clsHnd,
+ ObjectAllocationType allocType,
+ ssize_t length,
+ unsigned int* blockSize,
+ const char** reason,
+ bool preliminaryCheck)
+{
+ assert(preliminaryCheck || m_AnalysisDone);
+
+ bool enableBoxedValueClasses = true;
+ bool enableRefClasses = true;
+ bool enableArrays = true;
+ *reason = "[ok]";
- JITDUMP("Possibly stack-pointing locals:");
+#ifdef DEBUG
+ enableBoxedValueClasses = (JitConfig.JitObjectStackAllocationBoxedValueClass() != 0);
+ enableRefClasses = (JitConfig.JitObjectStackAllocationRefClass() != 0);
+ enableArrays = (JitConfig.JitObjectStackAllocationArray() != 0);
+#endif
+
+ unsigned classSize = 0;
+
+ if (allocType == OAT_NEWARR)
{
- BitVecOps::Iter iter(bitVecTraits, m_PossiblyStackPointingPointers);
- unsigned lclNum = 0;
- while (iter.NextElem(&lclNum))
+ if (!enableArrays)
+ {
+ *reason = "[disabled by config]";
+ return false;
+ }
+
+ if ((length < 0) || (length > CORINFO_Array_MaxLength))
+ {
+ *reason = "[invalid array length]";
+ return false;
+ }
+
+ ClassLayout* const layout = comp->typGetArrayLayout(clsHnd, (unsigned)length);
+ classSize = layout->GetSize();
+ }
+ else if (allocType == OAT_NEWOBJ)
+ {
+ if (comp->info.compCompHnd->isValueClass(clsHnd))
+ {
+ if (!enableBoxedValueClasses)
+ {
+ *reason = "[disabled by config]";
+ return false;
+ }
+
+ if (comp->info.compCompHnd->getTypeForBoxOnStack(clsHnd) == NO_CLASS_HANDLE)
+ {
+ *reason = "[no boxed type available]";
+ return false;
+ }
+
+ classSize = comp->info.compCompHnd->getClassSize(clsHnd);
+ }
+ else
{
- if (!BitVecOps::IsMember(bitVecTraits, m_DefinitelyStackPointingPointers, lclNum))
+ if (!enableRefClasses)
{
- JITDUMP(" V%02u", lclNum);
+ *reason = "[disabled by config]";
+ return false;
}
+
+ if (!comp->info.compCompHnd->canAllocateOnStack(clsHnd))
+ {
+ *reason = "[runtime disallows]";
+ return false;
+ }
+
+ classSize = comp->info.compCompHnd->getHeapClassSize(clsHnd);
}
- JITDUMP("\n");
}
+ else
+ {
+ assert(!"Unexpected allocation type");
+ return false;
+ }
+
+ if (classSize > m_StackAllocMaxSize)
+ {
+ *reason = "[too large]";
+ return false;
+ }
+
+ if (preliminaryCheck)
+ {
+ return true;
+ }
+
+ const bool escapes = CanLclVarEscape(lclNum);
+
+ if (escapes)
+ {
+ *reason = "[escapes]";
+ return false;
+ }
+
+ if (blockSize != nullptr)
+ {
+ *blockSize = classSize;
+ }
+
+ return true;
}
//------------------------------------------------------------------------
@@ -1351,8 +1819,7 @@ void ObjectAllocator::RewriteUses()
unsigned int newLclNum = BAD_VAR_NUM;
LclVarDsc* lclVarDsc = m_compiler->lvaGetDesc(lclNum);
- if ((lclNum < BitVecTraits::GetSize(&m_allocator->m_bitVecTraits)) &&
- m_allocator->MayLclVarPointToStack(lclNum))
+ if (m_allocator->MayLclVarPointToStack(lclNum))
{
// Analysis does not handle indirect access to pointer locals.
assert(tree->OperIsScalarLocal());
@@ -1539,7 +2006,7 @@ bool ObjectAllocator::AnalyzeIfCloningCanPreventEscape(BitVecTraits* bitVecTrait
for (unsigned p = 0; p < m_numPseudoLocals; p++)
{
- unsigned const pseudoLocal = p + comp->lvaCount;
+ unsigned const pseudoLocal = p + m_firstPseudoLocalNum;
bool canClone = true;
CloneInfo* info = nullptr;
@@ -1553,8 +2020,9 @@ bool ObjectAllocator::AnalyzeIfCloningCanPreventEscape(BitVecTraits* bitVecTrait
break;
}
- unsigned lclNum = BAD_VAR_NUM;
- BitVec pseudoLocalAdjacencies = m_ConnGraphAdjacencyMatrix[pseudoLocal];
+ // See what locals were "assigned" to the pseudo local.
+ //
+ BitVec pseudoLocalAdjacencies = m_ConnGraphAdjacencyMatrix[LocalToIndex(pseudoLocal)];
// If we found an allocation but didn't find any conditionally escaping uses, then cloning is of no use
//
@@ -1568,14 +2036,15 @@ bool ObjectAllocator::AnalyzeIfCloningCanPreventEscape(BitVecTraits* bitVecTrait
// Check if each conditionally escaping local escapes on its own; if so cloning is of no use
//
BitVecOps::Iter iterator(bitVecTraits, pseudoLocalAdjacencies);
- while (canClone && iterator.NextElem(&lclNum))
+ unsigned lclNumIndex = BAD_VAR_NUM;
+ while (canClone && iterator.NextElem(&lclNumIndex))
{
- if (BitVecOps::IsMember(bitVecTraits, escapingNodes, lclNum))
+ if (BitVecOps::IsMember(bitVecTraits, escapingNodes, lclNumIndex))
{
// The enumerator var or a related var had escaping uses somewhere in the method,
// not under a failing GDV or any GDV.
//
- JITDUMP(" V%02u escapes independently of P%02u\n", lclNum, pseudoLocal);
+ JITDUMP(" V%02u escapes independently of P%02u\n", IndexToLocal(lclNumIndex), pseudoLocal);
canClone = false;
break;
}
@@ -1583,11 +2052,11 @@ bool ObjectAllocator::AnalyzeIfCloningCanPreventEscape(BitVecTraits* bitVecTrait
// Also check the alloc temps
//
- if (info->m_allocTemps != nullptr)
+ if (canClone && (info->m_allocTemps != nullptr))
{
for (unsigned v : *(info->m_allocTemps))
{
- if (BitVecOps::IsMember(bitVecTraits, escapingNodes, v))
+ if (BitVecOps::IsMember(bitVecTraits, escapingNodes, LocalToIndex(v)))
{
JITDUMP(" alloc temp V%02u escapes independently of P%02u\n", v, pseudoLocal)
canClone = false;
@@ -1601,7 +2070,7 @@ bool ObjectAllocator::AnalyzeIfCloningCanPreventEscape(BitVecTraits* bitVecTrait
// We may be able to clone and specialize the enumerator uses to ensure
// that the allocated enumerator does not escape.
//
- JITDUMP(" P%02u is guarding the escape of V%02u\n", pseudoLocal, lclNum);
+ JITDUMP(" P%02u is guarding the escape of V%02u\n", pseudoLocal, info->m_local);
if (info->m_allocTemps != nullptr)
{
JITDUMP(" along with ");
@@ -1650,7 +2119,7 @@ bool ObjectAllocator::AnalyzeIfCloningCanPreventEscape(BitVecTraits* bitVecTrait
{
JITDUMP(" not optimizing, so will mark P%02u as escaping\n", pseudoLocal);
MarkLclVarAsEscaping(pseudoLocal);
- BitVecOps::AddElemD(bitVecTraits, escapingNodesToProcess, pseudoLocal);
+ BitVecOps::AddElemD(bitVecTraits, escapingNodesToProcess, LocalToIndex(pseudoLocal));
newEscapes = true;
}
}
@@ -1669,7 +2138,7 @@ unsigned ObjectAllocator::NewPseudoLocal()
unsigned result = BAD_VAR_NUM;
if (m_numPseudoLocals < m_maxPseudoLocals)
{
- result = comp->lvaCount + m_numPseudoLocals;
+ result = m_firstPseudoLocalNum + m_numPseudoLocals;
m_numPseudoLocals++;
}
return result;
@@ -2937,10 +3406,22 @@ void ObjectAllocator::CloneAndSpecialize(CloneInfo* info)
// Type for now as TYP_REF; this will get rewritten later during RewriteUses
//
- comp->lvaTable[newEnumeratorLocal].lvType = TYP_REF;
- comp->lvaTable[newEnumeratorLocal].lvSingleDef = 1;
+ LclVarDsc* const newEnumeratorDsc = comp->lvaGetDesc(newEnumeratorLocal);
+
+ newEnumeratorDsc->lvType = TYP_REF;
+ newEnumeratorDsc->lvSingleDef = 1;
comp->lvaSetClass(newEnumeratorLocal, info->m_type, /* isExact */ true);
+ newEnumeratorDsc->lvTracked = 1;
+ newEnumeratorDsc->lvVarIndex = (unsigned short)m_nextLocalIndex; // grr
+ assert(newEnumeratorDsc->lvVarIndex < comp->lvaTrackedToVarNumSize);
+ comp->lvaTrackedToVarNum[newEnumeratorDsc->lvVarIndex] = newEnumeratorLocal;
+ m_nextLocalIndex++;
+ assert(m_maxPseudoLocals > 0);
+ assert(newEnumeratorDsc->lvVarIndex < m_firstPseudoLocalIndex);
+
+ JITDUMP("Tracking V%02u via 0x%02x\n", newEnumeratorLocal, newEnumeratorDsc->lvVarIndex);
+
class ReplaceVisitor final : public GenTreeVisitor
{
CloneInfo* m_info;
diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index 99dd8965924669..d7a59bb33a85d7 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -125,6 +125,7 @@ class ObjectAllocator final : public Phase
// Data members
bool m_IsObjectStackAllocationEnabled;
bool m_AnalysisDone;
+ unsigned m_bvCount;
BitVecTraits m_bitVecTraits;
BitVec m_EscapingPointers;
// We keep the set of possibly-stack-pointing pointers as a superset of the set of
@@ -138,8 +139,11 @@ class ObjectAllocator final : public Phase
// Info for conditionally-escaping locals
LocalToLocalMap m_EnumeratorLocalToPseudoLocalMap;
CloneMap m_CloneMap;
- unsigned m_maxPseudoLocals;
+ unsigned m_nextLocalIndex;
+ unsigned m_firstPseudoLocalNum;
+ unsigned m_firstPseudoLocalIndex;
unsigned m_numPseudoLocals;
+ unsigned m_maxPseudoLocals;
unsigned m_regionsToClone;
//===============================================================================
@@ -160,11 +164,17 @@ class ObjectAllocator final : public Phase
virtual PhaseStatus DoPhase() override;
private:
+ bool IsTrackedType(var_types type);
+ bool IsTrackedLocal(unsigned lclNum);
+ bool HasIndex(unsigned lclNum);
+ unsigned LocalToIndex(unsigned lclNum);
+ unsigned IndexToLocal(unsigned bvIndex);
bool CanLclVarEscape(unsigned int lclNum);
void MarkLclVarAsPossiblyStackPointing(unsigned int lclNum);
void MarkLclVarAsDefinitelyStackPointing(unsigned int lclNum);
bool MayLclVarPointToStack(unsigned int lclNum);
bool DoesLclVarPointToStack(unsigned int lclNum);
+ void PrepareAnalysis();
void DoAnalysis();
void MarkLclVarAsEscaping(unsigned int lclNum);
void MarkEscapingVarsAndBuildConnGraph();
@@ -212,79 +222,11 @@ class ObjectAllocator final : public Phase
void CloneAndSpecialize();
static const unsigned int s_StackAllocMaxSize = 0x2000U;
-};
-
-//===============================================================================
-
-inline ObjectAllocator::ObjectAllocator(Compiler* comp)
- : Phase(comp, PHASE_ALLOCATE_OBJECTS)
- , m_IsObjectStackAllocationEnabled(false)
- , m_AnalysisDone(false)
- , m_bitVecTraits(BitVecTraits(comp->lvaCount, comp))
- , m_HeapLocalToStackLocalMap(comp->getAllocator(CMK_ObjectAllocator))
- , m_EnumeratorLocalToPseudoLocalMap(comp->getAllocator(CMK_ObjectAllocator))
- , m_CloneMap(comp->getAllocator(CMK_ObjectAllocator))
- , m_maxPseudoLocals(0)
- , m_numPseudoLocals(0)
- , m_regionsToClone(0)
-
-{
- // If we are going to do any conditional escape analysis, allocate
- // extra BV space for the "pseudo" locals we'll need.
- //
- // For now, disable conditional escape analysis with OSR
- // since the dominance picture is muddled at this point.
- //
- // The conditionally escaping allocation sites will likely be in loops anyways.
- //
- bool const hasEnumeratorLocals = comp->hasImpEnumeratorGdvLocalMap();
-
- if (hasEnumeratorLocals)
- {
- unsigned const enumeratorLocalCount = comp->getImpEnumeratorGdvLocalMap()->GetCount();
- assert(enumeratorLocalCount > 0);
-
- bool const enableConditionalEscape = JitConfig.JitObjectStackAllocationConditionalEscape() > 0;
- bool const isOSR = comp->opts.IsOSR();
-
- if (enableConditionalEscape && !isOSR)
- {
#ifdef DEBUG
- static ConfigMethodRange JitObjectStackAllocationConditionalEscapeRange;
- JitObjectStackAllocationConditionalEscapeRange.EnsureInit(
- JitConfig.JitObjectStackAllocationConditionalEscapeRange());
- const unsigned hash = comp->info.compMethodHash();
- const bool inRange = JitObjectStackAllocationConditionalEscapeRange.Contains(hash);
-#else
- const bool inRange = true;
+ void DumpIndex(unsigned bvIndex);
#endif
-
- if (inRange)
- {
- m_maxPseudoLocals = enumeratorLocalCount;
- m_bitVecTraits = BitVecTraits(comp->lvaCount + enumeratorLocalCount + 1, comp);
- JITDUMP("Enabling conditional escape analysis [%u pseudo-vars]\n", enumeratorLocalCount);
- }
- else
- {
- JITDUMP("Not enabling conditional escape analysis (disabled by range config)\n");
- }
- }
- else
- {
- JITDUMP("Not enabling conditional escape analysis [%u pseudo-vars]: %s\n", enumeratorLocalCount,
- enableConditionalEscape ? "OSR" : "disabled by config");
- }
- }
-
- m_EscapingPointers = BitVecOps::UninitVal();
- m_PossiblyStackPointingPointers = BitVecOps::UninitVal();
- m_DefinitelyStackPointingPointers = BitVecOps::UninitVal();
- m_ConnGraphAdjacencyMatrix = nullptr;
-
- m_StackAllocMaxSize = (unsigned)JitConfig.JitObjectStackAllocationSize();
-}
+};
//------------------------------------------------------------------------
// IsObjectStackAllocationEnabled: Returns true iff object stack allocation is enabled
@@ -305,131 +247,6 @@ inline void ObjectAllocator::EnableObjectStackAllocation()
m_IsObjectStackAllocationEnabled = true;
}
-//------------------------------------------------------------------------
-// CanAllocateLclVarOnStack: Returns true iff local variable can be
-// allocated on the stack.
-//
-// Arguments:
-// lclNum - Local variable number
-// clsHnd - Class/struct handle of the variable class
-// allocType - Type of allocation (newobj or newarr)
-// length - Length of the array (for newarr)
-// blockSize - [out, optional] exact size of the object
-// reason - [out, required] if result is false, reason why
-// preliminaryCheck - if true, allow checking before analysis is done
-// (for things that inherently disqualify the local)
-//
-// Return Value:
-// Returns true iff local variable can be allocated on the stack.
-//
-inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNum,
- CORINFO_CLASS_HANDLE clsHnd,
- ObjectAllocationType allocType,
- ssize_t length,
- unsigned int* blockSize,
- const char** reason,
- bool preliminaryCheck)
-{
- assert(preliminaryCheck || m_AnalysisDone);
-
- bool enableBoxedValueClasses = true;
- bool enableRefClasses = true;
- bool enableArrays = true;
- *reason = "[ok]";
-
-#ifdef DEBUG
- enableBoxedValueClasses = (JitConfig.JitObjectStackAllocationBoxedValueClass() != 0);
- enableRefClasses = (JitConfig.JitObjectStackAllocationRefClass() != 0);
- enableArrays = (JitConfig.JitObjectStackAllocationArray() != 0);
-#endif
-
- unsigned classSize = 0;
-
- if (allocType == OAT_NEWARR)
- {
- if (!enableArrays)
- {
- *reason = "[disabled by config]";
- return false;
- }
-
- if ((length < 0) || (length > CORINFO_Array_MaxLength))
- {
- *reason = "[invalid array length]";
- return false;
- }
-
- ClassLayout* const layout = comp->typGetArrayLayout(clsHnd, (unsigned)length);
- classSize = layout->GetSize();
- }
- else if (allocType == OAT_NEWOBJ)
- {
- if (comp->info.compCompHnd->isValueClass(clsHnd))
- {
- if (!enableBoxedValueClasses)
- {
- *reason = "[disabled by config]";
- return false;
- }
-
- if (comp->info.compCompHnd->getTypeForBoxOnStack(clsHnd) == NO_CLASS_HANDLE)
- {
- *reason = "[no boxed type available]";
- return false;
- }
-
- classSize = comp->info.compCompHnd->getClassSize(clsHnd);
- }
- else
- {
- if (!enableRefClasses)
- {
- *reason = "[disabled by config]";
- return false;
- }
-
- if (!comp->info.compCompHnd->canAllocateOnStack(clsHnd))
- {
- *reason = "[runtime disallows]";
- return false;
- }
-
- classSize = comp->info.compCompHnd->getHeapClassSize(clsHnd);
- }
- }
- else
- {
- assert(!"Unexpected allocation type");
- return false;
- }
-
- if (classSize > m_StackAllocMaxSize)
- {
- *reason = "[too large]";
- return false;
- }
-
- if (preliminaryCheck)
- {
- return true;
- }
-
- const bool escapes = CanLclVarEscape(lclNum);
-
- if (escapes)
- {
- *reason = "[escapes]";
- return false;
- }
-
- if (blockSize != nullptr)
- {
- *blockSize = classSize;
- }
-
- return true;
-}
-
//------------------------------------------------------------------------
// CanLclVarEscape: Returns true iff local variable can
// potentially escape from the method
@@ -442,7 +259,13 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNu
inline bool ObjectAllocator::CanLclVarEscape(unsigned int lclNum)
{
- return BitVecOps::IsMember(&m_bitVecTraits, m_EscapingPointers, lclNum);
+ if (!HasIndex(lclNum))
+ {
+ return true;
+ }
+
+ const unsigned bvIndex = LocalToIndex(lclNum);
+ return BitVecOps::IsMember(&m_bitVecTraits, m_EscapingPointers, bvIndex);
}
//------------------------------------------------------------------------
@@ -458,7 +281,14 @@ inline bool ObjectAllocator::CanLclVarEscape(unsigned int lclNum)
inline bool ObjectAllocator::MayLclVarPointToStack(unsigned int lclNum)
{
assert(m_AnalysisDone);
- return BitVecOps::IsMember(&m_bitVecTraits, m_PossiblyStackPointingPointers, lclNum);
+
+ if (!HasIndex(lclNum))
+ {
+ return false;
+ }
+
+ const unsigned bvIndex = LocalToIndex(lclNum);
+ return BitVecOps::IsMember(&m_bitVecTraits, m_PossiblyStackPointingPointers, bvIndex);
}
//------------------------------------------------------------------------
@@ -475,7 +305,14 @@ inline bool ObjectAllocator::MayLclVarPointToStack(unsigned int lclNum)
inline bool ObjectAllocator::DoesLclVarPointToStack(unsigned int lclNum)
{
assert(m_AnalysisDone);
- return BitVecOps::IsMember(&m_bitVecTraits, m_DefinitelyStackPointingPointers, lclNum);
+
+ if (!HasIndex(lclNum))
+ {
+ return false;
+ }
+
+ const unsigned bvIndex = LocalToIndex(lclNum);
+ return BitVecOps::IsMember(&m_bitVecTraits, m_DefinitelyStackPointingPointers, bvIndex);
}
//===============================================================================
diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp
index 164f53dda6b2f4..10abc319b6184f 100644
--- a/src/coreclr/jit/optimizer.cpp
+++ b/src/coreclr/jit/optimizer.cpp
@@ -2368,30 +2368,38 @@ PhaseStatus Compiler::optInvertLoops()
}
//-----------------------------------------------------------------------------
-// optOptimizeFlow: simplify flow graph
+// optOptimizeFlow: Simplify flowgraph, and run a few flow optimizations
//
// Returns:
// suitable phase status
//
-// Notes:
-// Does not do profile-based reordering to try and ensure that
-// that we recognize and represent as many loops as possible.
-//
PhaseStatus Compiler::optOptimizeFlow()
{
noway_assert(opts.OptimizationEnabled());
- fgUpdateFlowGraph(/* doTailDuplication */ true);
- fgReorderBlocks(/* useProfile */ false);
+ bool modified = fgUpdateFlowGraph(/* doTailDuplication */ true);
- // fgReorderBlocks can cause IR changes even if it does not modify
- // the flow graph. It calls gtPrepareCost which can cause operand swapping.
- // Work around this for now.
- //
- // Note phase status only impacts dumping and checking done post-phase,
- // it has no impact on a release build.
- //
- return PhaseStatus::MODIFIED_EVERYTHING;
+ // Skipping fgExpandRarelyRunBlocks when we have PGO data incurs diffs if the profile is inconsistent,
+ // as it will propagate missing profile weights throughout the flowgraph.
+ // Running profile synthesis beforehand should get rid of these diffs.
+ // TODO: Always rely on profile synthesis to identify cold blocks.
+ modified |= fgExpandRarelyRunBlocks();
+
+ // Run branch optimizations for non-handler blocks.
+ assert(!fgFuncletsCreated);
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next())
+ {
+ if (block->hasHndIndex())
+ {
+ assert(bbIsHandlerBeg(block));
+ block = ehGetDsc(block->getHndIndex())->ebdHndLast;
+ continue;
+ }
+
+ modified |= fgOptimizeBranch(block);
+ }
+
+ return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
}
//-----------------------------------------------------------------------------
@@ -2412,6 +2420,18 @@ PhaseStatus Compiler::optOptimizePreLayout()
modified |= fgExpandRarelyRunBlocks();
}
+ // Run a late pass of unconditional-to-conditional branch optimization, skipping handler blocks.
+ for (BasicBlock* block = fgFirstBB; block != fgFirstFuncletBB; block = block->Next())
+ {
+ if (!UsesFunclets() && block->hasHndIndex())
+ {
+ block = ehGetDsc(block->getHndIndex())->ebdHndLast;
+ continue;
+ }
+
+ modified |= fgOptimizeBranch(block);
+ }
+
return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
}
diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp
index bbd14401bc53eb..b1e3af0f917896 100644
--- a/src/coreclr/jit/promotion.cpp
+++ b/src/coreclr/jit/promotion.cpp
@@ -1307,6 +1307,13 @@ class LocalsUseVisitor : public GenTreeVisitor
rep.LclNum = m_compiler->lvaGrabTemp(false DEBUGARG(rep.Description));
LclVarDsc* dsc = m_compiler->lvaGetDesc(rep.LclNum);
dsc->lvType = rep.AccessType;
+
+ // Are we promoting Span<>._length field?
+ if ((rep.Offset == OFFSETOF__CORINFO_Span__length) && (rep.AccessType == TYP_INT) &&
+ m_compiler->lvaGetDesc(agg->LclNum)->IsSpan())
+ {
+ dsc->SetIsNeverNegative(true);
+ }
}
#ifdef DEBUG
diff --git a/src/coreclr/jit/rangecheck.cpp b/src/coreclr/jit/rangecheck.cpp
index da035fd952559e..7d7e20fde9daff 100644
--- a/src/coreclr/jit/rangecheck.cpp
+++ b/src/coreclr/jit/rangecheck.cpp
@@ -637,7 +637,7 @@ void RangeCheck::MergeEdgeAssertions(GenTreeLclVarCommon* lcl, ASSERT_VALARG_TP
//
bool RangeCheck::TryGetRangeFromAssertions(Compiler* comp, ValueNum num, ASSERT_VALARG_TP assertions, Range* pRange)
{
- MergeEdgeAssertions(comp, num, num, assertions, pRange);
+ MergeEdgeAssertions(comp, num, ValueNumStore::NoVN, assertions, pRange, false);
return !pRange->LowerLimit().IsUnknown() || !pRange->UpperLimit().IsUnknown();
}
@@ -645,14 +645,19 @@ bool RangeCheck::TryGetRangeFromAssertions(Compiler* comp, ValueNum num, ASSERT_
// MergeEdgeAssertions: Merge assertions on the edge flowing into the block about a variable
//
// Arguments:
-// comp - the compiler instance
-// normalLclVN - the value number to look for assertions for
-// preferredBoundVN - when this VN is set, it will be given preference over constant limits
-// assertions - the assertions to use
-// pRange - the range to tighten with assertions
+// comp - the compiler instance
+// normalLclVN - the value number to look for assertions for
+// preferredBoundVN - when this VN is set, it will be given preference over constant limits
+// assertions - the assertions to use
+// pRange - the range to tighten with assertions
+// canUseCheckedBounds - true if we can use checked bounds assertions (cache)
//
-void RangeCheck::MergeEdgeAssertions(
- Compiler* comp, ValueNum normalLclVN, ValueNum preferredBoundVN, ASSERT_VALARG_TP assertions, Range* pRange)
+void RangeCheck::MergeEdgeAssertions(Compiler* comp,
+ ValueNum normalLclVN,
+ ValueNum preferredBoundVN,
+ ASSERT_VALARG_TP assertions,
+ Range* pRange,
+ bool canUseCheckedBounds)
{
Range assertedRange = Range(Limit(Limit::keUnknown));
if (BitVecOps::IsEmpty(comp->apTraits, assertions))
@@ -680,7 +685,7 @@ void RangeCheck::MergeEdgeAssertions(
bool isUnsigned = false;
// Current assertion is of the form (i < len - cns) != 0
- if (curAssertion->IsCheckedBoundArithBound())
+ if (canUseCheckedBounds && curAssertion->IsCheckedBoundArithBound())
{
ValueNumStore::CompareCheckedBoundArithInfo info;
@@ -709,7 +714,7 @@ void RangeCheck::MergeEdgeAssertions(
cmpOper = (genTreeOps)info.cmpOper;
}
// Current assertion is of the form (i < len) != 0
- else if (curAssertion->IsCheckedBoundBound())
+ else if (canUseCheckedBounds && curAssertion->IsCheckedBoundBound())
{
ValueNumStore::CompareCheckedBoundArithInfo info;
@@ -767,7 +772,7 @@ void RangeCheck::MergeEdgeAssertions(
int cnstLimit = (int)curAssertion->op2.u1.iconVal;
assert(cnstLimit == comp->vnStore->CoercedConstantValue(curAssertion->op2.vn));
- if ((cnstLimit == 0) && (curAssertion->assertionKind == Compiler::OAK_NOT_EQUAL) &&
+ if ((cnstLimit == 0) && (curAssertion->assertionKind == Compiler::OAK_NOT_EQUAL) && canUseCheckedBounds &&
comp->vnStore->IsVNCheckedBound(curAssertion->op1.vn))
{
// we have arr.Len != 0, so the length must be atleast one
diff --git a/src/coreclr/jit/rangecheck.h b/src/coreclr/jit/rangecheck.h
index 884687ce35ff9c..d90257a990bc2e 100644
--- a/src/coreclr/jit/rangecheck.h
+++ b/src/coreclr/jit/rangecheck.h
@@ -728,8 +728,12 @@ class RangeCheck
void MergeEdgeAssertions(GenTreeLclVarCommon* lcl, ASSERT_VALARG_TP assertions, Range* pRange);
// Inspect the assertions about the current ValueNum to refine pRange
- static void MergeEdgeAssertions(
- Compiler* comp, ValueNum num, ValueNum preferredBoundVN, ASSERT_VALARG_TP assertions, Range* pRange);
+ static void MergeEdgeAssertions(Compiler* comp,
+ ValueNum num,
+ ValueNum preferredBoundVN,
+ ASSERT_VALARG_TP assertions,
+ Range* pRange,
+ bool canUseCheckedBounds = true);
// The maximum possible value of the given "limit". If such a value could not be determined
// return "false". For example: CORINFO_Array_MaxLength for array length.
diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp
index de23305c145b7f..9c7a8c6a112c28 100644
--- a/src/coreclr/jit/rationalize.cpp
+++ b/src/coreclr/jit/rationalize.cpp
@@ -341,11 +341,19 @@ void Rationalizer::RewriteHWIntrinsicAsUserCall(GenTree** use, ArrayStackgtFlags & GTF_REVERSE_OPS) == 0); // gtNewSimdShuffleNode with reverse ops is not supported
GenTree* op1 = operands[0];
GenTree* op2 = operands[1];
- if (op2->IsCnsVec() && comp->IsValidForShuffle(op2->AsVecCon(), simdSize, simdBaseType))
+ bool isShuffleNative = intrinsicId != NI_Vector128_Shuffle;
+#if defined(TARGET_XARCH)
+ isShuffleNative =
+ isShuffleNative && (intrinsicId != NI_Vector256_Shuffle) && (intrinsicId != NI_Vector512_Shuffle);
+#elif defined(TARGET_ARM64)
+ isShuffleNative = isShuffleNative && (intrinsicId != NI_Vector64_Shuffle);
+#endif
+
+ // Check if the required intrinsics to emit are available.
+ if (!comp->IsValidForShuffle(op2, simdSize, simdBaseType, nullptr, isShuffleNative))
{
- result = comp->gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize);
+ break;
}
+
+ result = comp->gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize, isShuffleNative);
break;
}
@@ -790,6 +810,13 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge
}
break;
+ case GT_BSWAP16:
+ if (node->gtGetOp1()->OperIs(GT_CAST))
+ {
+ comp->fgSimpleLowerBswap16(BlockRange(), node);
+ }
+ break;
+
default:
// Check that we don't have nodes not allowed in HIR here.
assert((node->DebugOperKind() & DBK_NOTHIR) == 0);
diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h
index cb4957a6b5f107..b8baab7d1825e9 100644
--- a/src/coreclr/jit/target.h
+++ b/src/coreclr/jit/target.h
@@ -239,6 +239,10 @@ typedef uint64_t regMaskSmall;
#define HAS_MORE_THAN_64_REGISTERS 1
#endif // TARGET_ARM64
+#define REG_LOW_BASE 0
+#ifdef HAS_MORE_THAN_64_REGISTERS
+#define REG_HIGH_BASE 64
+#endif
// TODO: Rename regMaskSmall as RegSet64 (at least for 64-bit)
typedef regMaskSmall SingleTypeRegSet;
inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg);
diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp
index a0ca5a4c0081cc..9e6a65bc7d9a3f 100644
--- a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp
+++ b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp
@@ -6,7 +6,6 @@
#include "rhassert.h"
#include "RedhawkWarnings.h"
#include "slist.h"
-#include "varint.h"
#include "regdisplay.h"
#include "StackFrameIterator.h"
#include "thread.h"
diff --git a/src/coreclr/nativeaot/Runtime/DebugHeader.cpp b/src/coreclr/nativeaot/Runtime/DebugHeader.cpp
index 051b9b0d8f7a88..9db38fc25eb855 100644
--- a/src/coreclr/nativeaot/Runtime/DebugHeader.cpp
+++ b/src/coreclr/nativeaot/Runtime/DebugHeader.cpp
@@ -7,7 +7,6 @@
#include "gcinterface.dac.h"
#include "rhassert.h"
#include "TargetPtrs.h"
-#include "varint.h"
#include "PalRedhawkCommon.h"
#include "PalRedhawk.h"
#include "holder.h"
diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp
index a3643e32f5eaf7..f7a91492de8c6f 100644
--- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp
@@ -13,7 +13,6 @@
#include "GcEnum.h"
#include "shash.h"
#include "TypeManager.h"
-#include "varint.h"
#include "PalRedhawkCommon.h"
#include "PalRedhawk.h"
#include "holder.h"
diff --git a/src/coreclr/nativeaot/Runtime/GCHelpers.cpp b/src/coreclr/nativeaot/Runtime/GCHelpers.cpp
index 91fd2db9260622..7ef489ddd33389 100644
--- a/src/coreclr/nativeaot/Runtime/GCHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/GCHelpers.cpp
@@ -17,7 +17,6 @@
#include "PalRedhawkCommon.h"
#include "slist.h"
-#include "varint.h"
#include "regdisplay.h"
#include "StackFrameIterator.h"
#include "interoplibinterface.h"
diff --git a/src/coreclr/nativeaot/Runtime/GcStressControl.cpp b/src/coreclr/nativeaot/Runtime/GcStressControl.cpp
index 3d01748557580e..3f9853a3ea3b1e 100644
--- a/src/coreclr/nativeaot/Runtime/GcStressControl.cpp
+++ b/src/coreclr/nativeaot/Runtime/GcStressControl.cpp
@@ -15,7 +15,6 @@
#include "Crst.h"
#include "RhConfig.h"
#include "slist.h"
-#include "varint.h"
#include "regdisplay.h"
#include "forward_declarations.h"
#include "StackFrameIterator.h"
diff --git a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp
index cd5f37e5199928..17bb554fa307d5 100644
--- a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp
@@ -18,7 +18,6 @@
#include "rhbinder.h"
#include "RuntimeInstance.h"
#include "regdisplay.h"
-#include "varint.h"
#include "StackFrameIterator.h"
#include "thread.h"
#include "event.h"
diff --git a/src/coreclr/nativeaot/Runtime/NativePrimitiveDecoder.h b/src/coreclr/nativeaot/Runtime/NativePrimitiveDecoder.h
new file mode 100644
index 00000000000000..bbfd20b1ac4ad8
--- /dev/null
+++ b/src/coreclr/nativeaot/Runtime/NativePrimitiveDecoder.h
@@ -0,0 +1,62 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// C/C++ clone of NativePrimitiveDecoder.cs subset
+
+class NativePrimitiveDecoder
+{
+public:
+ static uint32_t ReadUnsigned(uint8_t* & p)
+ {
+ uint32_t value = 0;
+
+ uint32_t val = *p;
+ if ((val & 1) == 0)
+ {
+ value = (val >> 1);
+ p += 1;
+ }
+ else if ((val & 2) == 0)
+ {
+ value = (val >> 2) |
+ (*(p + 1) << 6);
+ p += 2;
+ }
+ else if ((val & 4) == 0)
+ {
+ value = (val >> 3) |
+ (*(p + 1) << 5) |
+ (*(p + 2) << 13);
+ p += 3;
+ }
+ else if ((val & 8) == 0)
+ {
+ value = (val >> 4) |
+ (*(p + 1) << 4) |
+ (*(p + 2) << 12) |
+ (*(p + 3) << 20);
+ p += 4;
+ }
+ else
+ {
+ value = *(p+1) | (*(p+2) << 8) | (*(p+3) << 16) | (*(p+4) << 24);
+ p += 5;
+ }
+
+ return value;
+ }
+
+ static int32_t ReadInt32(uint8_t* & p)
+ {
+ int32_t value = *p | (*(p+1) << 8) | (*(p+2) << 16) | (*(p+3) << 24);
+ p += 4;
+ return value;
+ }
+
+ static uint32_t ReadUInt32(uint8_t* & p)
+ {
+ uint32_t value = *p | (*(p+1) << 8) | (*(p+2) << 16) | (*(p+3) << 24);
+ p += 4;
+ return value;
+ }
+};
diff --git a/src/coreclr/nativeaot/Runtime/RestrictedCallouts.cpp b/src/coreclr/nativeaot/Runtime/RestrictedCallouts.cpp
index 56e673217967fa..cf2cbf5df66934 100644
--- a/src/coreclr/nativeaot/Runtime/RestrictedCallouts.cpp
+++ b/src/coreclr/nativeaot/Runtime/RestrictedCallouts.cpp
@@ -22,7 +22,6 @@
#include "MethodTable.h"
#include "ObjectLayout.h"
#include "event.h"
-#include "varint.h"
#include "regdisplay.h"
#include "StackFrameIterator.h"
#include "thread.h"
diff --git a/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp b/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp
index ce16c44a514c23..cbb72fd1aead03 100644
--- a/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp
+++ b/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp
@@ -21,7 +21,6 @@
#include "shash.h"
#include "TypeManager.h"
#include "MethodTable.h"
-#include "varint.h"
#include "CommonMacros.inl"
#include "slist.inl"
diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp
index 78a39612f907c0..f9a730cd872d74 100644
--- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp
+++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp
@@ -14,7 +14,6 @@
#include "RedhawkWarnings.h"
#include "rhassert.h"
#include "slist.h"
-#include "varint.h"
#include "regdisplay.h"
#include "StackFrameIterator.h"
#include "thread.h"
diff --git a/src/coreclr/nativeaot/Runtime/TypeManager.cpp b/src/coreclr/nativeaot/Runtime/TypeManager.cpp
index 96dc357136d90a..cb993dfd74ffa3 100644
--- a/src/coreclr/nativeaot/Runtime/TypeManager.cpp
+++ b/src/coreclr/nativeaot/Runtime/TypeManager.cpp
@@ -10,7 +10,6 @@
#include "rhassert.h"
#include "slist.h"
#include "shash.h"
-#include "varint.h"
#include "rhbinder.h"
#include "regdisplay.h"
#include "StackFrameIterator.h"
diff --git a/src/coreclr/nativeaot/Runtime/event.cpp b/src/coreclr/nativeaot/Runtime/event.cpp
index 05a511510e385c..e53875d16318e1 100644
--- a/src/coreclr/nativeaot/Runtime/event.cpp
+++ b/src/coreclr/nativeaot/Runtime/event.cpp
@@ -9,7 +9,6 @@
#include "PalRedhawk.h"
#include "rhassert.h"
#include "slist.h"
-#include "varint.h"
#include "regdisplay.h"
#include "StackFrameIterator.h"
#include "thread.h"
diff --git a/src/coreclr/nativeaot/Runtime/eventtrace_gcheap.cpp b/src/coreclr/nativeaot/Runtime/eventtrace_gcheap.cpp
index 2acf81392c2b00..d2cdf011ba2a99 100644
--- a/src/coreclr/nativeaot/Runtime/eventtrace_gcheap.cpp
+++ b/src/coreclr/nativeaot/Runtime/eventtrace_gcheap.cpp
@@ -13,7 +13,6 @@
#include "daccess.h"
#include "slist.h"
-#include "varint.h"
#include "regdisplay.h"
#include "StackFrameIterator.h"
#include "thread.h"
diff --git a/src/coreclr/nativeaot/Runtime/inc/varint.h b/src/coreclr/nativeaot/Runtime/inc/varint.h
deleted file mode 100644
index e5d0853bf48ffa..00000000000000
--- a/src/coreclr/nativeaot/Runtime/inc/varint.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-class VarInt
-{
-public:
- static uint32_t ReadUnsigned(PTR_uint8_t & pbEncoding)
- {
- uintptr_t lengthBits = *pbEncoding & 0x0F;
- size_t negLength = s_negLengthTab[lengthBits];
- uintptr_t shift = s_shiftTab[lengthBits];
- uint32_t result = *(PTR_uint32_t)(pbEncoding - negLength - 4);
-
- result >>= shift;
- pbEncoding -= negLength;
-
- return result;
- }
-
-private:
- static int8_t s_negLengthTab[16];
- static uint8_t s_shiftTab[16];
-};
-
-#ifndef __GNUC__
-__declspec(selectany)
-#endif
-int8_t
-#ifdef __GNUC__
-__attribute__((weak))
-#endif
-VarInt::s_negLengthTab[16] =
-{
- -1, // 0
- -2, // 1
- -1, // 2
- -3, // 3
-
- -1, // 4
- -2, // 5
- -1, // 6
- -4, // 7
-
- -1, // 8
- -2, // 9
- -1, // 10
- -3, // 11
-
- -1, // 12
- -2, // 13
- -1, // 14
- -5, // 15
-};
-
-#ifndef __GNUC__
-__declspec(selectany)
-#endif
-uint8_t
-#ifdef __GNUC__
-__attribute__((weak))
-#endif
-VarInt::s_shiftTab[16] =
-{
- 32-7*1, // 0
- 32-7*2, // 1
- 32-7*1, // 2
- 32-7*3, // 3
-
- 32-7*1, // 4
- 32-7*2, // 5
- 32-7*1, // 6
- 32-7*4, // 7
-
- 32-7*1, // 8
- 32-7*2, // 9
- 32-7*1, // 10
- 32-7*3, // 11
-
- 32-7*1, // 12
- 32-7*2, // 13
- 32-7*1, // 14
- 0, // 15
-};
diff --git a/src/coreclr/nativeaot/Runtime/interoplibinterface_objc.cpp b/src/coreclr/nativeaot/Runtime/interoplibinterface_objc.cpp
index 5a1dfe10f96b20..e3f1ca24cee3fa 100644
--- a/src/coreclr/nativeaot/Runtime/interoplibinterface_objc.cpp
+++ b/src/coreclr/nativeaot/Runtime/interoplibinterface_objc.cpp
@@ -18,7 +18,6 @@
#include "MethodTable.h"
#include "ObjectLayout.h"
#include "event.h"
-#include "varint.h"
#include "regdisplay.h"
#include "StackFrameIterator.h"
#include "thread.h"
diff --git a/src/coreclr/nativeaot/Runtime/portable.cpp b/src/coreclr/nativeaot/Runtime/portable.cpp
index f11a678e9027ee..ec992bc0ac4708 100644
--- a/src/coreclr/nativeaot/Runtime/portable.cpp
+++ b/src/coreclr/nativeaot/Runtime/portable.cpp
@@ -13,7 +13,6 @@
#include "slist.h"
#include "shash.h"
-#include "varint.h"
#include "holder.h"
#include "rhbinder.h"
#include "Crst.h"
diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp
index dffebe8bbde3c9..7bf6745066de45 100644
--- a/src/coreclr/nativeaot/Runtime/startup.cpp
+++ b/src/coreclr/nativeaot/Runtime/startup.cpp
@@ -11,7 +11,6 @@
#include "PalRedhawk.h"
#include "rhassert.h"
#include "slist.h"
-#include "varint.h"
#include "regdisplay.h"
#include "StackFrameIterator.h"
#include "thread.h"
diff --git a/src/coreclr/nativeaot/Runtime/stressLog.cpp b/src/coreclr/nativeaot/Runtime/stressLog.cpp
index 97c0a2615f9501..c1461cd284de88 100644
--- a/src/coreclr/nativeaot/Runtime/stressLog.cpp
+++ b/src/coreclr/nativeaot/Runtime/stressLog.cpp
@@ -21,7 +21,6 @@
#include "Crst.h"
#include "rhassert.h"
#include "slist.h"
-#include "varint.h"
#include "regdisplay.h"
#include "StackFrameIterator.h"
#include "thread.h"
diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp
index a1a774a63ec691..0f269f23cbd58f 100644
--- a/src/coreclr/nativeaot/Runtime/thread.cpp
+++ b/src/coreclr/nativeaot/Runtime/thread.cpp
@@ -15,7 +15,6 @@
#include "PalRedhawk.h"
#include "rhassert.h"
#include "slist.h"
-#include "varint.h"
#include "regdisplay.h"
#include "StackFrameIterator.h"
#include "thread.h"
diff --git a/src/coreclr/nativeaot/Runtime/threadstore.cpp b/src/coreclr/nativeaot/Runtime/threadstore.cpp
index dbd321ffcff4e6..ddc5ef2eee0285 100644
--- a/src/coreclr/nativeaot/Runtime/threadstore.cpp
+++ b/src/coreclr/nativeaot/Runtime/threadstore.cpp
@@ -10,7 +10,6 @@
#include "PalRedhawk.h"
#include "rhassert.h"
#include "slist.h"
-#include "varint.h"
#include "regdisplay.h"
#include "StackFrameIterator.h"
#include "thread.h"
diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp
index 231be716316475..79e0548199c2d9 100644
--- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp
+++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp
@@ -9,7 +9,7 @@
#include "regdisplay.h"
#include "ICodeManager.h"
#include "UnixNativeCodeManager.h"
-#include "varint.h"
+#include "NativePrimitiveDecoder.h"
#include "holder.h"
#include "CommonMacros.inl"
@@ -1421,7 +1421,7 @@ bool UnixNativeCodeManager::EHEnumInit(MethodInfo * pMethodInfo, PTR_VOID * pMet
pEnumState->pMethodStartAddress = dac_cast(pNativeMethodInfo->pMethodStartAddress);
pEnumState->pEHInfo = dac_cast(p + *dac_cast(p));
pEnumState->uClause = 0;
- pEnumState->nClauses = VarInt::ReadUnsigned(pEnumState->pEHInfo);
+ pEnumState->nClauses = NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo);
return true;
}
@@ -1439,9 +1439,9 @@ bool UnixNativeCodeManager::EHEnumNext(EHEnumState * pEHEnumState, EHClause * pE
pEnumState->uClause++;
- pEHClauseOut->m_tryStartOffset = VarInt::ReadUnsigned(pEnumState->pEHInfo);
+ pEHClauseOut->m_tryStartOffset = NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo);
- uint32_t tryEndDeltaAndClauseKind = VarInt::ReadUnsigned(pEnumState->pEHInfo);
+ uint32_t tryEndDeltaAndClauseKind = NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo);
pEHClauseOut->m_clauseKind = (EHClauseKind)(tryEndDeltaAndClauseKind & 0x3);
pEHClauseOut->m_tryEndOffset = pEHClauseOut->m_tryStartOffset + (tryEndDeltaAndClauseKind >> 2);
@@ -1457,23 +1457,23 @@ bool UnixNativeCodeManager::EHEnumNext(EHEnumState * pEHEnumState, EHClause * pE
switch (pEHClauseOut->m_clauseKind)
{
case EH_CLAUSE_TYPED:
- pEHClauseOut->m_handlerAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + VarInt::ReadUnsigned(pEnumState->pEHInfo);
+ pEHClauseOut->m_handlerAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo);
// Read target type
{
// @TODO: Compress EHInfo using type table index scheme
// https://github.com/dotnet/corert/issues/972
- int32_t typeRelAddr = *((PTR_int32_t&)pEnumState->pEHInfo);
- pEHClauseOut->m_pTargetType = dac_cast(pEnumState->pEHInfo + typeRelAddr);
- pEnumState->pEHInfo += 4;
+ uint8_t* pBase = pEnumState->pEHInfo;
+ int32_t typeRelAddr = NativePrimitiveDecoder::ReadInt32(pEnumState->pEHInfo);
+ pEHClauseOut->m_pTargetType = dac_cast(pBase + typeRelAddr);
}
break;
case EH_CLAUSE_FAULT:
- pEHClauseOut->m_handlerAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + VarInt::ReadUnsigned(pEnumState->pEHInfo);
+ pEHClauseOut->m_handlerAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo);
break;
case EH_CLAUSE_FILTER:
- pEHClauseOut->m_handlerAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + VarInt::ReadUnsigned(pEnumState->pEHInfo);
- pEHClauseOut->m_filterAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + VarInt::ReadUnsigned(pEnumState->pEHInfo);
+ pEHClauseOut->m_handlerAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo);
+ pEHClauseOut->m_filterAddress = dac_cast(PINSTRToPCODE(dac_cast(pEnumState->pMethodStartAddress))) + NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo);
break;
default:
UNREACHABLE_MSG("unexpected EHClauseKind");
diff --git a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp
index af1a7185866120..6a9435d58c0b83 100644
--- a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp
@@ -175,7 +175,7 @@ struct Registers_REGDISPLAY : REGDISPLAY
inline bool validFloatRegister(int) { return false; }
inline bool validVectorRegister(int) { return false; }
- inline static int lastDwarfRegNum() { return 16; }
+ static constexpr int lastDwarfRegNum() { return 16; }
inline bool validRegister(int regNum) const
{
@@ -297,7 +297,7 @@ struct Registers_REGDISPLAY : REGDISPLAY
inline bool validFloatRegister(int) { return false; }
inline bool validVectorRegister(int) { return false; }
- inline static int lastDwarfRegNum() { return 16; }
+ static constexpr int lastDwarfRegNum() { return 16; }
inline bool validRegister(int regNum) const
{
@@ -336,7 +336,7 @@ struct Registers_REGDISPLAY : REGDISPLAY
struct Registers_REGDISPLAY : REGDISPLAY
{
inline static int getArch() { return libunwind::REGISTERS_ARM; }
- inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM; }
+ static constexpr int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM; }
bool validRegister(int num) const;
bool validFloatRegister(int num) const;
@@ -533,7 +533,7 @@ void Registers_REGDISPLAY::setFloatRegister(int num, double value)
struct Registers_REGDISPLAY : REGDISPLAY
{
inline static int getArch() { return libunwind::REGISTERS_ARM64; }
- inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; }
+ static constexpr int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; }
bool validRegister(int num) const;
bool validFloatRegister(int num) { return false; };
@@ -816,7 +816,7 @@ void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value)
struct Registers_REGDISPLAY : REGDISPLAY
{
inline static int getArch() { return libunwind::REGISTERS_LOONGARCH; }
- inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_LOONGARCH; }
+ static constexpr int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_LOONGARCH; }
bool validRegister(int num) const;
bool validFloatRegister(int num) { return false; };
@@ -1096,7 +1096,7 @@ void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value)
struct Registers_REGDISPLAY : REGDISPLAY
{
inline static int getArch() { return libunwind::REGISTERS_RISCV; }
- inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_RISCV; }
+ static constexpr int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_RISCV; }
bool validRegister(int num) const;
bool validFloatRegister(int num) { return false; };
diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp
index a6437b56ac7b80..1b95e27caad563 100644
--- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp
+++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp
@@ -11,7 +11,7 @@
#include "regdisplay.h"
#include "ICodeManager.h"
#include "CoffNativeCodeManager.h"
-#include "varint.h"
+#include "NativePrimitiveDecoder.h"
#include "holder.h"
#include "CommonMacros.inl"
@@ -1044,7 +1044,7 @@ bool CoffNativeCodeManager::EHEnumInit(MethodInfo * pMethodInfo, PTR_VOID * pMet
pEnumState->pMethodStartAddress = dac_cast(*pMethodStartAddress);
pEnumState->pEHInfo = dac_cast(m_moduleBase + *dac_cast(p));
pEnumState->uClause = 0;
- pEnumState->nClauses = VarInt::ReadUnsigned(pEnumState->pEHInfo);
+ pEnumState->nClauses = NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo);
return true;
}
@@ -1059,9 +1059,9 @@ bool CoffNativeCodeManager::EHEnumNext(EHEnumState * pEHEnumState, EHClause * pE
return false;
pEnumState->uClause++;
- pEHClauseOut->m_tryStartOffset = VarInt::ReadUnsigned(pEnumState->pEHInfo);
+ pEHClauseOut->m_tryStartOffset = NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo);
- uint32_t tryEndDeltaAndClauseKind = VarInt::ReadUnsigned(pEnumState->pEHInfo);
+ uint32_t tryEndDeltaAndClauseKind = NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo);
pEHClauseOut->m_clauseKind = (EHClauseKind)(tryEndDeltaAndClauseKind & 0x3);
pEHClauseOut->m_tryEndOffset = pEHClauseOut->m_tryStartOffset + (tryEndDeltaAndClauseKind >> 2);
@@ -1077,22 +1077,22 @@ bool CoffNativeCodeManager::EHEnumNext(EHEnumState * pEHEnumState, EHClause * pE
switch (pEHClauseOut->m_clauseKind)
{
case EH_CLAUSE_TYPED:
- pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + VarInt::ReadUnsigned(pEnumState->pEHInfo);
+ pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo);
// Read target type
{
// @TODO: Compress EHInfo using type table index scheme
// https://github.com/dotnet/corert/issues/972
- uint32_t typeRVA = *((PTR_uint32_t&)pEnumState->pEHInfo)++;
+ uint32_t typeRVA = NativePrimitiveDecoder::ReadUInt32(pEnumState->pEHInfo);
pEHClauseOut->m_pTargetType = dac_cast(m_moduleBase + typeRVA);
}
break;
case EH_CLAUSE_FAULT:
- pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + VarInt::ReadUnsigned(pEnumState->pEHInfo);
+ pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo);
break;
case EH_CLAUSE_FILTER:
- pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + VarInt::ReadUnsigned(pEnumState->pEHInfo);
- pEHClauseOut->m_filterAddress = pEnumState->pMethodStartAddress + VarInt::ReadUnsigned(pEnumState->pEHInfo);
+ pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo);
+ pEHClauseOut->m_filterAddress = pEnumState->pMethodStartAddress + NativePrimitiveDecoder::ReadUnsigned(pEnumState->pEHInfo);
break;
default:
UNREACHABLE_MSG("unexpected EHClauseKind");
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/CompatibilitySuppressions.xml b/src/coreclr/nativeaot/System.Private.CoreLib/src/CompatibilitySuppressions.xml
index 0abedd830a6b14..2de459555d114d 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/CompatibilitySuppressions.xml
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/CompatibilitySuppressions.xml
@@ -697,10 +697,6 @@
CP0001
T:Internal.Runtime.Augments.TypeLoaderCallbacks
-
- CP0001
- T:Internal.Runtime.CanonTypeKind
-
CP0001
T:Internal.Runtime.CompilerServices.FunctionPointerOps
@@ -717,18 +713,6 @@
CP0001
T:Internal.Runtime.CompilerServices.OpenMethodResolver
-
- CP0001
- T:Internal.Runtime.CompilerServices.RuntimeFieldHandleInfo
-
-
- CP0001
- T:Internal.Runtime.CompilerServices.RuntimeMethodHandleInfo
-
-
- CP0001
- T:Internal.Runtime.CompilerServices.RuntimeSignature
-
CP0001
T:Internal.Runtime.TypeManagerHandle
@@ -753,6 +737,14 @@
CP0001
T:System.MDArray
+
+ CP0001
+ T:System.FieldHandleInfo
+
+
+ CP0001
+ T:System.MethodHandleInfo
+
CP0001
T:System.Reflection.BinderBundle
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Augments/ReflectionAugments.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Augments/ReflectionAugments.cs
index 6202cd45be89d5..33ebf9886745f0 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Augments/ReflectionAugments.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Augments/ReflectionAugments.cs
@@ -103,21 +103,6 @@ public static Assembly Load(AssemblyName assemblyRef, bool throwOnFileNotFound)
return RuntimeAssemblyInfo.GetRuntimeAssemblyIfExists(assemblyRef.ToRuntimeAssemblyName());
}
- public static Assembly Load(ReadOnlySpan rawAssembly, ReadOnlySpan pdbSymbolStore)
- {
- if (rawAssembly.IsEmpty)
- throw new ArgumentNullException(nameof(rawAssembly));
-
- return RuntimeAssemblyInfo.GetRuntimeAssemblyFromByteArray(rawAssembly, pdbSymbolStore);
- }
-
- public static Assembly Load(string assemblyPath)
- {
- ArgumentNullException.ThrowIfNull(assemblyPath);
-
- return RuntimeAssemblyInfo.GetRuntimeAssemblyFromPath(assemblyPath);
- }
-
//
// This overload of GetMethodForHandle only accepts handles for methods declared on non-generic types (the method, however,
// can be an instance of a generic method.) To resolve handles for methods declared on generic types, you must pass
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/AssemblyBinder.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/AssemblyBinder.cs
index a59701eb8c85bf..cee8abb23ab324 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/AssemblyBinder.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/AssemblyBinder.cs
@@ -19,7 +19,6 @@ public partial struct AssemblyBindResult
{
public MetadataReader Reader;
public ScopeDefinitionHandle ScopeDefinitionHandle;
- public IEnumerable OverflowScopes;
}
//
@@ -33,10 +32,6 @@ public abstract class AssemblyBinder
{
public abstract bool Bind(RuntimeAssemblyName refName, bool cacheMissedLookups, out AssemblyBindResult result, out Exception exception);
- public abstract bool Bind(ReadOnlySpan rawAssembly, ReadOnlySpan rawSymbolStore, out AssemblyBindResult result, out Exception exception);
-
- public abstract bool Bind(string assemblyPath, out AssemblyBindResult bindResult, out Exception exception);
-
public abstract IList GetLoadedAssemblies();
}
}
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/RuntimeAugments.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/RuntimeAugments.cs
index d592f7506f7aec..a473c80470f6fc 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/RuntimeAugments.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/RuntimeAugments.cs
@@ -483,7 +483,7 @@ public static IntPtr NewInterfaceDispatchCell(RuntimeTypeHandle interfaceTypeHan
}
[Intrinsic]
- public static RuntimeTypeHandle GetCanonType(CanonTypeKind kind)
+ public static RuntimeTypeHandle GetCanonType()
{
// Compiler needs to expand this. This is not expressible in IL.
throw new NotSupportedException();
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/TypeLoaderCallbacks.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/TypeLoaderCallbacks.cs
index 7dc7641435e5e9..c7b5203ffebd4f 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/TypeLoaderCallbacks.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/TypeLoaderCallbacks.cs
@@ -19,13 +19,10 @@ public abstract class TypeLoaderCallbacks
public abstract bool TryGetConstructedGenericTypeForComponents(RuntimeTypeHandle genericTypeDefinitionHandle, RuntimeTypeHandle[] genericTypeArgumentHandles, out RuntimeTypeHandle runtimeTypeHandle);
public abstract IntPtr GetThreadStaticGCDescForDynamicType(TypeManagerHandle handle, int index);
public abstract IntPtr GenericLookupFromContextAndSignature(IntPtr context, IntPtr signature, out IntPtr auxResult);
- public abstract bool GetRuntimeMethodHandleComponents(RuntimeMethodHandle runtimeMethodHandle, out RuntimeTypeHandle declaringTypeHandle, out MethodNameAndSignature nameAndSignature, out RuntimeTypeHandle[] genericMethodArgs);
- public abstract RuntimeMethodHandle GetRuntimeMethodHandleForComponents(RuntimeTypeHandle declaringTypeHandle, string methodName, RuntimeSignature methodSignature, RuntimeTypeHandle[] genericMethodArgs);
- public abstract bool CompareMethodSignatures(RuntimeSignature signature1, RuntimeSignature signature2);
+ public abstract RuntimeMethodHandle GetRuntimeMethodHandleForComponents(RuntimeTypeHandle declaringTypeHandle, MethodHandle handle, RuntimeTypeHandle[] genericMethodArgs);
public abstract IntPtr TryGetDefaultConstructorForType(RuntimeTypeHandle runtimeTypeHandle);
public abstract IntPtr ResolveGenericVirtualMethodTarget(RuntimeTypeHandle targetTypeHandle, RuntimeMethodHandle declMethod);
- public abstract bool GetRuntimeFieldHandleComponents(RuntimeFieldHandle runtimeFieldHandle, out RuntimeTypeHandle declaringTypeHandle, out string fieldName);
- public abstract RuntimeFieldHandle GetRuntimeFieldHandleForComponents(RuntimeTypeHandle declaringTypeHandle, string fieldName);
+ public abstract RuntimeFieldHandle GetRuntimeFieldHandleForComponents(RuntimeTypeHandle declaringTypeHandle, FieldHandle handle);
public abstract bool TryGetPointerTypeForTargetType(RuntimeTypeHandle pointeeTypeHandle, out RuntimeTypeHandle pointerTypeHandle);
public abstract bool TryGetArrayTypeForElementType(RuntimeTypeHandle elementTypeHandle, bool isMdArray, int rank, out RuntimeTypeHandle arrayTypeHandle);
///
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/MethodNameAndSignature.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/MethodNameAndSignature.cs
new file mode 100644
index 00000000000000..04884e5ac54506
--- /dev/null
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/MethodNameAndSignature.cs
@@ -0,0 +1,57 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Diagnostics;
+
+using Internal.Metadata.NativeFormat;
+
+namespace Internal.Runtime.CompilerServices
+{
+ [CLSCompliant(false)]
+ public class MethodNameAndSignature
+ {
+ public MetadataReader Reader { get; }
+ public MethodHandle Handle { get; }
+
+ public MethodNameAndSignature(MetadataReader reader, MethodHandle handle)
+ {
+ Reader = reader;
+ Handle = handle;
+ }
+
+ public string GetName()
+ {
+ Method method = Reader.GetMethod(Handle);
+ return Reader.GetString(method.Name);
+ }
+
+ public override bool Equals(object? compare)
+ {
+ if (compare == null)
+ return false;
+
+ MethodNameAndSignature? other = compare as MethodNameAndSignature;
+ if (other == null)
+ return false;
+
+ // Comparing handles is enough if there's only one metadata blob
+ // (Same assumption in GetHashCode below!)
+ Debug.Assert(Reader == other.Reader);
+
+ Method thisMethod = Reader.GetMethod(Handle);
+ Method otherMethod = other.Reader.GetMethod(other.Handle);
+
+ return thisMethod.Signature.Equals(otherMethod.Signature)
+ && thisMethod.Name.Equals(otherMethod.Name);
+ }
+
+ public override int GetHashCode()
+ {
+ Method method = Reader.GetMethod(Handle);
+
+ // Assumes we only have one metadata blob
+ return method.Signature.GetHashCode() ^ method.Name.GetHashCode();
+ }
+ }
+}
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeFieldHandleInfo.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeFieldHandleInfo.cs
deleted file mode 100644
index 53f4d7e9ac2585..00000000000000
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeFieldHandleInfo.cs
+++ /dev/null
@@ -1,15 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System;
-using System.Runtime.InteropServices;
-
-namespace Internal.Runtime.CompilerServices
-{
- [StructLayout(LayoutKind.Sequential)]
- [CLSCompliant(false)]
- public unsafe struct RuntimeFieldHandleInfo
- {
- public IntPtr NativeLayoutInfoSignature;
- }
-}
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeMethodHandleInfo.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeMethodHandleInfo.cs
deleted file mode 100644
index 41aa17cdcbfdae..00000000000000
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeMethodHandleInfo.cs
+++ /dev/null
@@ -1,53 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System;
-using System.Diagnostics;
-using System.Reflection;
-using System.Runtime.InteropServices;
-
-using Internal.Runtime.Augments;
-
-namespace Internal.Runtime.CompilerServices
-{
- public class MethodNameAndSignature
- {
- public string Name { get; }
- public RuntimeSignature Signature { get; }
-
- public MethodNameAndSignature(string name, RuntimeSignature signature)
- {
- Name = name;
- Signature = signature;
- }
-
- public override bool Equals(object? compare)
- {
- if (compare == null)
- return false;
-
- MethodNameAndSignature? other = compare as MethodNameAndSignature;
- if (other == null)
- return false;
-
- if (Name != other.Name)
- return false;
-
- return Signature.Equals(other.Signature);
- }
-
- public override int GetHashCode()
- {
- int hash = Name.GetHashCode();
-
- return hash;
- }
- }
-
- [StructLayout(LayoutKind.Sequential)]
- [CLSCompliant(false)]
- public unsafe struct RuntimeMethodHandleInfo
- {
- public IntPtr NativeLayoutInfoSignature;
- }
-}
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeSignature.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeSignature.cs
deleted file mode 100644
index 28fc165de7df99..00000000000000
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/RuntimeSignature.cs
+++ /dev/null
@@ -1,127 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System;
-using System.Diagnostics;
-using System.Runtime;
-
-using Internal.Runtime.Augments;
-
-namespace Internal.Runtime.CompilerServices
-{
- public struct RuntimeSignature
- {
- private IntPtr _moduleHandle;
- private int _tokenOrOffset;
- private bool _isNativeLayoutSignature;
-
- [CLSCompliant(false)]
- public static RuntimeSignature CreateFromNativeLayoutSignature(TypeManagerHandle moduleHandle, uint nativeLayoutOffset)
- {
- return new RuntimeSignature
- {
- _moduleHandle = moduleHandle.GetIntPtrUNSAFE(),
- _tokenOrOffset = (int)nativeLayoutOffset,
- _isNativeLayoutSignature = true,
- };
- }
-
- [CLSCompliant(false)]
- public static RuntimeSignature CreateFromNativeLayoutSignature(RuntimeSignature oldSignature, uint newNativeLayoutOffset)
- {
- return new RuntimeSignature
- {
- _moduleHandle = oldSignature._moduleHandle,
- _tokenOrOffset = (int)newNativeLayoutOffset,
- _isNativeLayoutSignature = true,
- };
- }
-
- public static RuntimeSignature CreateFromMethodHandle(TypeManagerHandle moduleHandle, int token)
- {
- return new RuntimeSignature
- {
- _moduleHandle = moduleHandle.GetIntPtrUNSAFE(),
- _tokenOrOffset = token,
- _isNativeLayoutSignature = false,
- };
- }
-
- public bool IsNativeLayoutSignature
- {
- get
- {
- return _isNativeLayoutSignature;
- }
- }
-
- public int Token
- {
- get
- {
- if (_isNativeLayoutSignature)
- {
- Debug.Assert(false);
- return -1;
- }
- return _tokenOrOffset;
- }
- }
-
- [CLSCompliant(false)]
- public uint NativeLayoutOffset
- {
- get
- {
- if (!_isNativeLayoutSignature)
- {
- Debug.Assert(false);
- return unchecked((uint)-1);
- }
- return (uint)_tokenOrOffset;
- }
- }
-
- public IntPtr ModuleHandle
- {
- get
- {
- return _moduleHandle;
- }
- }
-
- public bool Equals(RuntimeSignature other)
- {
- if (IsNativeLayoutSignature && other.IsNativeLayoutSignature)
- {
- if ((ModuleHandle == other.ModuleHandle) && (NativeLayoutOffset == other.NativeLayoutOffset))
- return true;
- }
- else if (!IsNativeLayoutSignature && !other.IsNativeLayoutSignature)
- {
- if ((ModuleHandle == other.ModuleHandle) && (Token == other.Token))
- return true;
- }
-
- // Walk both signatures to check for equality the slow way
- return RuntimeAugments.TypeLoaderCallbacks.CompareMethodSignatures(this, other);
- }
-
- ///
- /// Fast equality check
- ///
- public bool StructuralEquals(RuntimeSignature other)
- {
- if (_moduleHandle != other._moduleHandle)
- return false;
-
- if (_tokenOrOffset != other._tokenOrOffset)
- return false;
-
- if (_isNativeLayoutSignature != other._isNativeLayoutSignature)
- return false;
-
- return true;
- }
- }
-}
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System.Private.CoreLib.csproj b/src/coreclr/nativeaot/System.Private.CoreLib/src/System.Private.CoreLib.csproj
index 71646b278f0f3c..72df9073427635 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System.Private.CoreLib.csproj
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System.Private.CoreLib.csproj
@@ -119,9 +119,7 @@
-
-
-
+
@@ -355,9 +353,6 @@
Internal\LowLevelLinq\LowLevelEnumerable.ToArray.cs
-
- Internal\Runtime\CanonTypeKind.cs
-
System\Runtime\RhFailFastReason.cs
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.GetTypeCore.CaseInsensitive.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.GetTypeCore.CaseInsensitive.cs
index a6a183fa73cd84..20dbd1206ef754 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.GetTypeCore.CaseInsensitive.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.GetTypeCore.CaseInsensitive.cs
@@ -72,36 +72,33 @@ private LowLevelDictionary CreateCaseInsensitiveTypeDictionary(
LowLevelDictionary dict = new LowLevelDictionary();
- foreach (QScopeDefinition scope in AllScopes)
+ MetadataReader reader = Scope.Reader;
+ ScopeDefinition scopeDefinition = Scope.ScopeDefinition;
+ IEnumerable topLevelNamespaceHandles = new NamespaceDefinitionHandle[] { scopeDefinition.RootNamespaceDefinition };
+ IEnumerable allNamespaceHandles = reader.GetTransitiveNamespaces(topLevelNamespaceHandles);
+ foreach (NamespaceDefinitionHandle namespaceHandle in allNamespaceHandles)
{
- MetadataReader reader = scope.Reader;
- ScopeDefinition scopeDefinition = scope.ScopeDefinition;
- IEnumerable topLevelNamespaceHandles = new NamespaceDefinitionHandle[] { scopeDefinition.RootNamespaceDefinition };
- IEnumerable allNamespaceHandles = reader.GetTransitiveNamespaces(topLevelNamespaceHandles);
- foreach (NamespaceDefinitionHandle namespaceHandle in allNamespaceHandles)
- {
- string ns = namespaceHandle.ToNamespaceName(reader);
- if (ns.Length != 0)
- ns += ".";
- ns = ns.ToLowerInvariant();
+ string ns = namespaceHandle.ToNamespaceName(reader);
+ if (ns.Length != 0)
+ ns += ".";
+ ns = ns.ToLowerInvariant();
- NamespaceDefinition namespaceDefinition = namespaceHandle.GetNamespaceDefinition(reader);
- foreach (TypeDefinitionHandle typeDefinitionHandle in namespaceDefinition.TypeDefinitions)
+ NamespaceDefinition namespaceDefinition = namespaceHandle.GetNamespaceDefinition(reader);
+ foreach (TypeDefinitionHandle typeDefinitionHandle in namespaceDefinition.TypeDefinitions)
+ {
+ string fullName = ns + typeDefinitionHandle.GetTypeDefinition(reader).Name.GetString(reader).ToLowerInvariant();
+ if (!dict.TryGetValue(fullName, out _))
{
- string fullName = ns + typeDefinitionHandle.GetTypeDefinition(reader).Name.GetString(reader).ToLowerInvariant();
- if (!dict.TryGetValue(fullName, out _))
- {
- dict.Add(fullName, new QHandle(reader, typeDefinitionHandle));
- }
+ dict.Add(fullName, new QHandle(reader, typeDefinitionHandle));
}
+ }
- foreach (TypeForwarderHandle typeForwarderHandle in namespaceDefinition.TypeForwarders)
+ foreach (TypeForwarderHandle typeForwarderHandle in namespaceDefinition.TypeForwarders)
+ {
+ string fullName = ns + typeForwarderHandle.GetTypeForwarder(reader).Name.GetString(reader).ToLowerInvariant();
+ if (!dict.TryGetValue(fullName, out _))
{
- string fullName = ns + typeForwarderHandle.GetTypeForwarder(reader).Name.GetString(reader).ToLowerInvariant();
- if (!dict.TryGetValue(fullName, out _))
- {
- dict.Add(fullName, new QHandle(reader, typeForwarderHandle));
- }
+ dict.Add(fullName, new QHandle(reader, typeForwarderHandle));
}
}
}
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.GetTypeCore.CaseSensitive.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.GetTypeCore.CaseSensitive.cs
index 34a045281fd52f..e52605d1d45cee 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.GetTypeCore.CaseSensitive.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.GetTypeCore.CaseSensitive.cs
@@ -21,15 +21,12 @@ internal sealed override RuntimeTypeInfo UncachedGetTypeCoreCaseSensitive(string
namespaceParts[numNamespaceParts - i - 1] = parts[i];
string name = parts[numNamespaceParts];
- foreach (QScopeDefinition scopeDefinition in AllScopes)
- {
- MetadataReader reader = scopeDefinition.Reader;
- ScopeDefinitionHandle scopeDefinitionHandle = scopeDefinition.Handle;
-
- NamespaceDefinition namespaceDefinition;
- if (!TryResolveNamespaceDefinitionCaseSensitive(reader, namespaceParts, scopeDefinitionHandle, out namespaceDefinition))
- continue;
+ MetadataReader reader = Scope.Reader;
+ ScopeDefinitionHandle scopeDefinitionHandle = Scope.Handle;
+ NamespaceDefinition namespaceDefinition;
+ if (TryResolveNamespaceDefinitionCaseSensitive(reader, namespaceParts, scopeDefinitionHandle, out namespaceDefinition))
+ {
// We've successfully drilled down the namespace chain. Now look for a top-level type matching the type name.
TypeDefinitionHandleCollection candidateTypes = namespaceDefinition.TypeDefinitions;
foreach (TypeDefinitionHandle candidateType in candidateTypes)
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.cs
index 3e26daa4672511..2ca7967bcffc5b 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Assemblies/NativeFormat/NativeFormatRuntimeAssembly.cs
@@ -20,21 +20,17 @@ namespace System.Reflection.Runtime.Assemblies.NativeFormat
{
internal sealed partial class NativeFormatRuntimeAssembly : RuntimeAssemblyInfo
{
- private NativeFormatRuntimeAssembly(MetadataReader reader, ScopeDefinitionHandle scope, IEnumerable overflowScopes)
+ private NativeFormatRuntimeAssembly(MetadataReader reader, ScopeDefinitionHandle scope)
{
Scope = new QScopeDefinition(reader, scope);
- OverflowScopes = overflowScopes;
}
public sealed override IEnumerable CustomAttributes
{
get
{
- foreach (QScopeDefinition scope in AllScopes)
- {
- foreach (CustomAttributeData cad in RuntimeCustomAttributeData.GetCustomAttributes(scope.Reader, scope.ScopeDefinition.CustomAttributes))
- yield return cad;
- }
+ foreach (CustomAttributeData cad in RuntimeCustomAttributeData.GetCustomAttributes(Scope.Reader, Scope.ScopeDefinition.CustomAttributes))
+ yield return cad;
}
}
@@ -43,17 +39,14 @@ public sealed override IEnumerable DefinedTypes
[RequiresUnreferencedCode("Types might be removed")]
get
{
- foreach (QScopeDefinition scope in AllScopes)
- {
- MetadataReader reader = scope.Reader;
- ScopeDefinition scopeDefinition = scope.ScopeDefinition;
- IEnumerable topLevelNamespaceHandles = new NamespaceDefinitionHandle[] { scopeDefinition.RootNamespaceDefinition };
- IEnumerable allNamespaceHandles = reader.GetTransitiveNamespaces(topLevelNamespaceHandles);
- IEnumerable allTopLevelTypes = reader.GetTopLevelTypes(allNamespaceHandles);
- IEnumerable allTypes = reader.GetTransitiveTypes(allTopLevelTypes, publicOnly: false);
- foreach (TypeDefinitionHandle typeDefinitionHandle in allTypes)
- yield return (TypeInfo)typeDefinitionHandle.GetNamedType(reader).ToType();
- }
+ MetadataReader reader = Scope.Reader;
+ ScopeDefinition scopeDefinition = Scope.ScopeDefinition;
+ IEnumerable topLevelNamespaceHandles = new NamespaceDefinitionHandle[] { scopeDefinition.RootNamespaceDefinition };
+ IEnumerable allNamespaceHandles = reader.GetTransitiveNamespaces(topLevelNamespaceHandles);
+ IEnumerable allTopLevelTypes = reader.GetTopLevelTypes(allNamespaceHandles);
+ IEnumerable allTypes = reader.GetTransitiveTypes(allTopLevelTypes, publicOnly: false);
+ foreach (TypeDefinitionHandle typeDefinitionHandle in allTypes)
+ yield return (TypeInfo)typeDefinitionHandle.GetNamedType(reader).ToType();
}
}
@@ -62,17 +55,14 @@ public sealed override IEnumerable ExportedTypes
[RequiresUnreferencedCode("Types might be removed")]
get
{
- foreach (QScopeDefinition scope in AllScopes)
- {
- MetadataReader reader = scope.Reader;
- ScopeDefinition scopeDefinition = scope.ScopeDefinition;
- IEnumerable topLevelNamespaceHandles = new NamespaceDefinitionHandle[] { scopeDefinition.RootNamespaceDefinition };
- IEnumerable allNamespaceHandles = reader.GetTransitiveNamespaces(topLevelNamespaceHandles);
- IEnumerable allTopLevelTypes = reader.GetTopLevelTypes(allNamespaceHandles);
- IEnumerable allTypes = reader.GetTransitiveTypes(allTopLevelTypes, publicOnly: true);
- foreach (TypeDefinitionHandle typeDefinitionHandle in allTypes)
- yield return typeDefinitionHandle.ResolveTypeDefinition(reader).ToType();
- }
+ MetadataReader reader = Scope.Reader;
+ ScopeDefinition scopeDefinition = Scope.ScopeDefinition;
+ IEnumerable topLevelNamespaceHandles = new NamespaceDefinitionHandle[] { scopeDefinition.RootNamespaceDefinition };
+ IEnumerable allNamespaceHandles = reader.GetTransitiveNamespaces(topLevelNamespaceHandles);
+ IEnumerable allTopLevelTypes = reader.GetTopLevelTypes(allNamespaceHandles);
+ IEnumerable allTypes = reader.GetTransitiveTypes(allTopLevelTypes, publicOnly: true);
+ foreach (TypeDefinitionHandle typeDefinitionHandle in allTypes)
+ yield return typeDefinitionHandle.ResolveTypeDefinition(reader).ToType();
}
}
@@ -80,22 +70,16 @@ public sealed override MethodInfo EntryPoint
{
get
{
- // The scope that defines metadata for the owning type of the entrypoint will be the one
- // to carry the entrypoint token information. Find it by iterating over all scopes.
+ MetadataReader reader = Scope.Reader;
- foreach (QScopeDefinition scope in AllScopes)
+ QualifiedMethodHandle entrypointHandle = Scope.ScopeDefinition.EntryPoint;
+ if (!entrypointHandle.IsNil)
{
- MetadataReader reader = scope.Reader;
-
- QualifiedMethodHandle entrypointHandle = scope.ScopeDefinition.EntryPoint;
- if (!entrypointHandle.IsNil)
- {
- QualifiedMethod entrypointMethod = entrypointHandle.GetQualifiedMethod(reader);
- TypeDefinitionHandle declaringTypeHandle = entrypointMethod.EnclosingType;
- MethodHandle methodHandle = entrypointMethod.Method;
- NativeFormatRuntimeNamedTypeInfo containingType = NativeFormatRuntimeNamedTypeInfo.GetRuntimeNamedTypeInfo(reader, declaringTypeHandle, default(RuntimeTypeHandle));
- return RuntimeNamedMethodInfo.GetRuntimeNamedMethodInfo(new NativeFormatMethodCommon(methodHandle, containingType, containingType), containingType);
- }
+ QualifiedMethod entrypointMethod = entrypointHandle.GetQualifiedMethod(reader);
+ TypeDefinitionHandle declaringTypeHandle = entrypointMethod.EnclosingType;
+ MethodHandle methodHandle = entrypointMethod.Method;
+ NativeFormatRuntimeNamedTypeInfo containingType = NativeFormatRuntimeNamedTypeInfo.GetRuntimeNamedTypeInfo(reader, declaringTypeHandle, default(RuntimeTypeHandle));
+ return RuntimeNamedMethodInfo