Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
xxAny, xxAll comparisons, part 2.
  • Loading branch information
jandupej committed Mar 15, 2023
commit f317b0af848e27ec686df1ec2bea6db3a2243068
46 changes: 19 additions & 27 deletions src/mono/mono/arch/arm64/arm64-codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1139,19 +1139,16 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_dup_g_4s(p, rd, rn) arm_neon_cpy_opcode ((p), VREG_FULL, 0b0, 0b00100, 0b0001, (rd), (rn))
#define arm_neon_dup_g_2d(p, rd, rn) arm_neon_cpy_opcode ((p), VREG_FULL, 0b0, 0b00100, 0b0001, (rd), (rn))

// the opcode is smov, but we define variants smovs and smovd by whether they fill a 32 or 64-bit reg.
#define arm_neon_smovs_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00001 | ((index) << 1), 0b0101, (rd), (rn))
#define arm_neon_smovs_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00010 | ((index) << 2), 0b0101, (rd), (rn))
#define arm_neon_smovd_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00001 | ((index) << 1), 0b0101, (rd), (rn))
#define arm_neon_smovd_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00010 | ((index) << 2), 0b0101, (rd), (rn))
#define arm_neon_smovd_s(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00100 | ((index) << 3), 0b0101, (rd), (rn))

// the opcode is umov, but we define variants smovs and smovd by whether they fill a 32 or 64-bit reg.
#define arm_neon_umovs_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00001 | ((index) << 1), 0b0111, (rd), (rn))
#define arm_neon_umovs_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00010 | ((index) << 2), 0b0111, (rd), (rn))
#define arm_neon_umovd_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00001 | ((index) << 1), 0b0111, (rd), (rn))
#define arm_neon_umovd_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00010 | ((index) << 2), 0b0111, (rd), (rn))
#define arm_neon_umovd_s(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00100 | ((index) << 3), 0b0111, (rd), (rn))
#define arm_neon_smov_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00001 | ((index) << 1), 0b0101, (rd), (rn))
#define arm_neon_smov_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00010 | ((index) << 2), 0b0101, (rd), (rn))
#define arm_neon_smov_s(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00100 | ((index) << 3), 0b0101, (rd), (rn))
#define arm_neon_smov_d(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b01000 | ((index) << 4), 0b0101, (rd), (rn))

#define arm_neon_umov_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00001 | ((index) << 1), 0b0111, (rd), (rn))
#define arm_neon_umov_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00010 | ((index) << 2), 0b0111, (rd), (rn))
#define arm_neon_umov_s(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00100 | ((index) << 3), 0b0111, (rd), (rn))
#define arm_neon_umov_d(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b01000 | ((index) << 4), 0b0111, (rd), (rn))


/* NEON :: 3-register same FP16 */
// TODO
Expand Down Expand Up @@ -2316,6 +2313,15 @@ arm_encode_arith_imm (int imm, guint32 *shift)
arm_neon_shimm_opcode ((p), (q), (u), (__temp_emit0 >> 3) & 0b1111, __temp_emit0 & 0b111, (opcode), (rd), (rn)) \
} while (0)

#define arm_neon_shimm_shl_immh_immb(size, shift) (((shift) + (8 << (size))) & 0b01111111)
#define arm_neon_shimm_shl_opcode(p, q, u, size, opcode, rd, rn, shift) do { \
int32_t ___temp_emit0 = arm_neon_shimm_shl_immh_immb ((size), (shift)); \
arm_neon_shimm_opcode ((p), (q), (u), (__temp_emit0 >> 3) & 0b1111, __temp_emit0 & 0b111, (opcode), (rd), (rn)) \
} while (0)

#define arm_neon_sli(p, width, type, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), (width), 0b1, (type), 0b01010, (rd), (rn), (shift))
#define arm_neon_shrn(p, type, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_LOW, 0b0, (type), 0b10000, (rd), (rn), (shift))

#define arm_neon_sshr_8b(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b00000, (rd), (rn), (shift))
#define arm_neon_sshr_16b(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b00000, (rd), (rn), (shift))
#define arm_neon_sshr_4h(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_LOW, 0b0, SIZE_2, 0b00000, (rd), (rn), (shift))
Expand Down Expand Up @@ -2348,12 +2354,6 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_srsra_4s(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b0, SIZE_4, 0b00110, (rd), (rn), (shift))
#define arm_neon_srsra_2d(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b0, SIZE_8, 0b00110, (rd), (rn), (shift))

#define arm_neon_shimm_shl_immh_immb(size, shift) (((shift) + (8 << (size))) & 0b01111111)
#define arm_neon_shimm_shl_opcode(p, q, u, size, opcode, rd, rn, shift) do { \
int32_t ___temp_emit0 = arm_neon_shimm_shl_immh_immb ((size), (shift)); \
arm_neon_shimm_opcode ((p), (q), (u), (__temp_emit0 >> 3) & 0b1111, __temp_emit0 & 0b111, (opcode), (rd), (rn)) \
} while (0)

#define arm_neon_shl_8b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b01010, (rd), (rn), (shift))
#define arm_neon_shl_16b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b01010, (rd), (rn), (shift))
#define arm_neon_shl_4h(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b0, SIZE_2, 0b01010, (rd), (rn), (shift))
Expand Down Expand Up @@ -2457,14 +2457,6 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_sri_4s(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b01000, (rd), (rn), (shift))
#define arm_neon_sri_2d(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b1, SIZE_8, 0b01000, (rd), (rn), (shift))

#define arm_neon_sli_8b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_16b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_4h(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_2, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_8h(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_2s(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_4, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_4s(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_2d(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_8, 0b01010, (rd), (rn), (shift))

#define arm_neon_sqshlu_8b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b01100, (rd), (rn), (shift))
#define arm_neon_sqshlu_16b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01100, (rd), (rn), (shift))
#define arm_neon_sqshlu_4h(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_2, 0b01100, (rd), (rn), (shift))
Expand Down
5 changes: 5 additions & 0 deletions src/mono/mono/mini/mini-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -1479,13 +1479,18 @@ MINI_OP(OP_XCOMPARE_SCALAR, "xcompare_scalar", XREG, XREG, XREG)
MINI_OP(OP_XCOMPARE_FP, "xcompare_fp", XREG, XREG, XREG)
MINI_OP(OP_XCOMPARE_FP_SCALAR, "xcompare_fp_scalar", XREG, XREG, XREG)

/* Extract mask from XREG into LREG.
* inst_c0 - specific instruction, one of SIMD_EXTRMASKL_... */
MINI_OP(OP_XEXTRMASK_I8, "xextrmask_i8", LREG, XREG, NONE)

/*
* Generic SIMD operations, the rest of the JIT doesn't care about the exact operation.
*/
MINI_OP(OP_XBINOP, "xbinop", XREG, XREG, XREG)
MINI_OP(OP_XBINOP_FORCEINT, "xbinop_forceint", XREG, XREG, XREG)
MINI_OP(OP_XBINOP_SCALAR, "xbinop_scalar", XREG, XREG, XREG)
MINI_OP(OP_XBINOP_BYSCALAR, "xbinop_byscalar", XREG, XREG, XREG)

/* inst_c0 contains an INTRINS_ enum, inst_c1 might contain additional data */
MINI_OP(OP_XOP, "xop", NONE, NONE, NONE)
MINI_OP(OP_XOP_X_I, "xop_x_i", XREG, IREG, NONE)
Expand Down
11 changes: 6 additions & 5 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include <mono/metadata/reflection-internals.h>
#include <mono/utils/mono-hwcap.h>

#if defined (MONO_ARCH_SIMD_INTRINSICS)
#if TRUE//defined (MONO_ARCH_SIMD_INTRINSICS)

#if defined(DISABLE_JIT)

Expand Down Expand Up @@ -1190,10 +1190,10 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
return NULL;
#endif
// FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64
#ifdef TARGET_ARM64
if (!(cfg->compile_aot && cfg->full_aot && !cfg->interp))
return NULL;
#endif
//#ifdef TARGET_ARM64
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's for testing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to add some kind of option for this into utils/options-def.h:
DEFINE_BOOL(experimental_jit_simd, "experimental-jit-simd", FALSE,"")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the idea. This is now tracked in #83587

// if (!(cfg->compile_aot && cfg->full_aot && !cfg->interp))
// return NULL;
//#endif

int id = lookup_intrins (sri_vector_methods, sizeof (sri_vector_methods), cmethod);
if (id == -1) {
Expand All @@ -1216,6 +1216,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
case SN_LessThanOrEqual:
case SN_Negate:
case SN_OnesComplement:
case SN_GreaterThanAny:
break;
default:
return NULL;
Expand Down
12 changes: 12 additions & 0 deletions src/mono/sample/HelloWorld/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,30 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;

namespace HelloWorld
{
internal class Program
{
[MethodImpl(MethodImplOptions.NoInlining)]
private static int X()
{
Vector128<int> a = Vector128.Create(2,2,3,4);
Vector128<int> b = Vector128.Create(1,2,3,4);
return Vector128.GreaterThanAny(a,b) ? 1 : 0;
}

private static void Main(string[] args)
{
bool isMono = typeof(object).Assembly.GetType("Mono.RuntimeStructs") != null;
Console.WriteLine($"Hello World {(isMono ? "from Mono!" : "from CoreCLR!")}");
Console.WriteLine(typeof(object).Assembly.FullName);
Console.WriteLine(System.Reflection.Assembly.GetEntryAssembly ());
Console.WriteLine(System.Runtime.InteropServices.RuntimeInformation.FrameworkDescription);

Console.WriteLine(X().ToString());
}
}
}