@@ -7866,16 +7866,16 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
78667866 case NI_AVX512F_BroadcastVector128ToVector512:
78677867 case NI_AVX512F_BroadcastVector256ToVector512:
78687868 {
7869+ assert (!supportsSIMDScalarLoads);
7870+
78697871 if (parentNode->OperIsMemoryLoad ())
78707872 {
78717873 supportsGeneralLoads = !childNode->OperIsHWIntrinsic ();
78727874 break ;
78737875 }
7874- else
7875- {
7876- supportsGeneralLoads = true ;
7877- break ;
7878- }
7876+
7877+ supportsGeneralLoads = true ;
7878+ break ;
78797879 }
78807880
78817881 case NI_SSE41_ConvertToVector128Int16:
@@ -7941,26 +7941,47 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
79417941 }
79427942
79437943 case NI_SSE2_ConvertToVector128Double:
7944- case NI_SSE3_MoveAndDuplicate:
79457944 case NI_AVX_ConvertToVector256Double:
7945+ case NI_AVX512F_ConvertToVector512Double:
7946+ case NI_AVX512F_VL_ConvertToVector128Double:
7947+ case NI_AVX512F_VL_ConvertToVector256Double:
79467948 {
79477949 assert (!supportsSIMDScalarLoads);
79487950
79497951 // Most instructions under the non-VEX encoding require aligned operands.
79507952 // Those used for Sse2.ConvertToVector128Double (CVTDQ2PD and CVTPS2PD)
7951- // and Sse3.MoveAndDuplicate (MOVDDUP) are exceptions and don't fail for
7952- // unaligned inputs as they read mem64 (half the vector width) instead
7953+ // are exceptions and don't fail for unaligned inputs as they read half
7954+ // the vector width instead
79537955
79547956 supportsAlignedSIMDLoads = !comp->opts .MinOpts ();
79557957 supportsUnalignedSIMDLoads = true ;
79567958
79577959 const unsigned expectedSize = genTypeSize (parentNode->TypeGet ()) / 2 ;
79587960 const unsigned operandSize = genTypeSize (childNode->TypeGet ());
79597961
7960- // For broadcasts we can only optimize constants and memory operands
7961- const bool broadcastIsContainable = childNode->OperIsConst () || childNode->isMemoryOp ();
7962- supportsGeneralLoads =
7963- broadcastIsContainable && supportsUnalignedSIMDLoads && (operandSize >= expectedSize);
7962+ if (childNode->OperIsConst () || childNode->isMemoryOp ())
7963+ {
7964+ // For broadcasts we can only optimize constants and memory operands
7965+ // since we're going from a smaller base type to a larger base type
7966+ supportsGeneralLoads = supportsUnalignedSIMDLoads && (operandSize >= expectedSize);
7967+ }
7968+ break ;
7969+ }
7970+
7971+ case NI_SSE3_MoveAndDuplicate:
7972+ {
7973+ // Most instructions under the non-VEX encoding require aligned operands.
7974+ // Those used for Sse3.MoveAndDuplicate (MOVDDUP) are exceptions and don't
7975+ // fail for unaligned inputs as they read half the vector width instead
7976+
7977+ supportsAlignedSIMDLoads = !comp->opts .MinOpts ();
7978+ supportsUnalignedSIMDLoads = true ;
7979+
7980+ const unsigned expectedSize = genTypeSize (parentNode->TypeGet ()) / 2 ;
7981+ const unsigned operandSize = genTypeSize (childNode->TypeGet ());
7982+
7983+ supportsGeneralLoads = supportsUnalignedSIMDLoads && (operandSize >= expectedSize);
7984+ supportsSIMDScalarLoads = true ;
79647985 break ;
79657986 }
79667987
@@ -7986,8 +8007,6 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
79868007 break ;
79878008 }
79888009 }
7989-
7990- assert (supportsSIMDScalarLoads == false );
79918010 break ;
79928011 }
79938012
0 commit comments