Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 38 additions & 3 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18659,10 +18659,45 @@ bool GenTree::isCommutativeHWIntrinsic() const
assert(gtOper == GT_HWINTRINSIC);

#ifdef TARGET_XARCH
return HWIntrinsicInfo::IsCommutative(AsHWIntrinsic()->GetHWIntrinsicId());
#else
return false;
const GenTreeHWIntrinsic* node = AsHWIntrinsic();
NamedIntrinsic id = node->GetHWIntrinsicId();

if (HWIntrinsicInfo::IsCommutative(id))
{
return true;
}

if (HWIntrinsicInfo::IsMaybeCommutative(id))
{
switch (id)
{
case NI_SSE_Max:
case NI_SSE_Min:
{
return false;
}

case NI_SSE2_Max:
case NI_SSE2_Min:
{
return !varTypeIsFloating(node->GetSimdBaseType());
}

case NI_AVX_Max:
case NI_AVX_Min:
{
return false;
}

default:
{
unreached();
}
}
}
Comment on lines +18670 to +18697
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've kept this "simple" since we need to backport into .NET 7.

It isn't just a "return false", however, because we will want to specially handle the case in .NET 8 since certain scenarios can be commutative and this makes the follow up PR significantly simpler.

#endif // TARGET_XARCH

return false;
Copy link
Member Author

@tannergooding tannergooding Sep 15, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Notably Arm64 is returning constant false and it shouldn't be. Given this needs to be backported to .NET 7, I didn't "fix" this (its not a correctness bug, just a case where we might generate "worse" codegen).

I plan on putting up a follow up PR to ensure Arm64 intrinsics are handled as commutative (they are already correctly marked as required, this function returning false is the blocker for them)

}

bool GenTree::isContainableHWIntrinsic() const
Expand Down
17 changes: 17 additions & 0 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ enum HWIntrinsicFlag : unsigned int
// the intrinsic can be used on hardware with AVX but not AVX2 support
HW_Flag_AvxOnlyCompatible = 0x40000,

// MaybeCommutative
// - if a binary-op intrinsic is maybe commutative (e.g., Max or Min for float/double), its op1 can possibly be
// contained
HW_Flag_MaybeCommutative = 0x80000,

#elif defined(TARGET_ARM64)
// The intrinsic has an immediate operand
// - the value can be (and should be) encoded in a corresponding instruction when the operand value is constant
Expand Down Expand Up @@ -626,6 +631,18 @@ struct HWIntrinsicInfo
return (flags & HW_Flag_Commutative) != 0;
}

static bool IsMaybeCommutative(NamedIntrinsic id)
{
HWIntrinsicFlag flags = lookupFlags(id);
#if defined(TARGET_XARCH)
return (flags & HW_Flag_MaybeCommutative) != 0;
#elif defined(TARGET_ARM64)
return false;
#else
#error Unsupported platform
#endif
}

static bool RequiresCodegen(NamedIntrinsic id)
{
HWIntrinsicFlag flags = lookupFlags(id);
Expand Down
12 changes: 6 additions & 6 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -290,9 +290,9 @@ HARDWARE_INTRINSIC(SSE, LoadHigh,
HARDWARE_INTRINSIC(SSE, LoadLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE, LoadScalarVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE, LoadVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE, Max, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE, Max, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative)
HARDWARE_INTRINSIC(SSE, MaxScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE, Min, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE, Min, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative)
HARDWARE_INTRINSIC(SSE, MinScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE, MoveHighToLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhlps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment)
HARDWARE_INTRINSIC(SSE, MoveLowToHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment)
Expand Down Expand Up @@ -403,10 +403,10 @@ HARDWARE_INTRINSIC(SSE2, LoadLow,
HARDWARE_INTRINSIC(SSE2, LoadScalarVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_movd, INS_movq, INS_movq, INS_invalid, INS_movsdsse2}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2, LoadVector128, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_invalid, INS_movupd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2, MaskMove, 16, 3, {INS_maskmovdqu, INS_maskmovdqu, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE2, Max, 16, 2, {INS_invalid, INS_pmaxub, INS_pmaxsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2, Max, 16, 2, {INS_invalid, INS_pmaxub, INS_pmaxsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative)
HARDWARE_INTRINSIC(SSE2, MemoryFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2, MaxScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2, Min, 16, 2, {INS_invalid, INS_pminub, INS_pminsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2, Min, 16, 2, {INS_invalid, INS_pminub, INS_pminsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative)
HARDWARE_INTRINSIC(SSE2, MinScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2, MoveMask, 16, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskpd}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(SSE2, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_invalid, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_NoContainment)
Expand Down Expand Up @@ -598,8 +598,8 @@ HARDWARE_INTRINSIC(AVX, InsertVector128,
HARDWARE_INTRINSIC(AVX, LoadAlignedVector256, 32, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX, LoadDquVector256, 32, 1, {INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX, LoadVector256, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX, Max, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX, Min, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX, Max, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative)
HARDWARE_INTRINSIC(AVX, Min, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative)
HARDWARE_INTRINSIC(AVX, MaskLoad, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX, MaskStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AVX, MoveMask, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

public unsafe class ImageSharp_2117
{
public static int Main(string[] args)
{
if (Sse.IsSupported)
{
Vector128<float> fnan = Vector128.Create(float.NaN);
Vector128<float> res1 = Sse.Max(Sse.LoadVector128((float*)(&fnan)), Vector128<float>.Zero);
Vector128<float> res2 = Sse.Min(Sse.LoadVector128((float*)(&fnan)), Vector128<float>.Zero);

if (float.IsNaN(res1[0]) || float.IsNaN(res2[0]))
{
return 0;
}
}

if (Sse2.IsSupported)
{
Vector128<double> dnan = Vector128.Create(double.NaN);
Vector128<double> res3 = Sse2.Max(Sse2.LoadVector128((double*)(&dnan)), Vector128<double>.Zero);
Vector128<double> res4 = Sse2.Min(Sse2.LoadVector128((double*)(&dnan)), Vector128<double>.Zero);

if (double.IsNaN(res3[0]) || double.IsNaN(res4[0]))
{
return 0;
}
}

return 100;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
</PropertyGroup>
<PropertyGroup>
<DebugType>None</DebugType>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>