Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Lower vqrshrun_n
  • Loading branch information
ghehg committed Sep 30, 2024
commit 447d1af4732ca89f3d6e356afa36172f12353b2c
22 changes: 22 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,28 @@ class CIRGenBuilderTy : public CIRBaseBuilderTy {
}
bool isInt(mlir::Type i) { return mlir::isa<mlir::cir::IntType>(i); }

mlir::cir::IntType getExtendedIntTy(mlir::cir::IntType ty, bool isSigned) {
if (isInt8Ty(ty)) {
return isSigned ? getSInt16Ty() : getUInt16Ty();
}
if (isInt16Ty(ty)) {
return isSigned ? getSInt32Ty() : getUInt32Ty();
}
if (isInt32Ty(ty)) {
return isSigned ? getSInt64Ty() : getUInt64Ty();
}
llvm_unreachable("NYI");
}

mlir::cir::VectorType getExtendedElementVectorType(mlir::cir::VectorType vt,
bool isSigned = false) {
auto elementTy =
mlir::dyn_cast_or_null<mlir::cir::IntType>(vt.getEltType());
assert(elementTy && "expected int vector");
return mlir::cir::VectorType::get(
getContext(), getExtendedIntTy(elementTy, isSigned), vt.getSize());
}

mlir::cir::LongDoubleType
getLongDoubleTy(const llvm::fltSemantics &format) const {
if (&format == &llvm::APFloat::IEEEdouble())
Expand Down
23 changes: 15 additions & 8 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1899,9 +1899,11 @@ findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
return nullptr;
}

static mlir::Type GetNeonType(CIRGenFunction *CGF, NeonTypeFlags TypeFlags,
bool HasLegalHalfType = true, bool V1Ty = false,
bool AllowBFloatArgsAndRet = true) {
static mlir::cir::VectorType GetNeonType(CIRGenFunction *CGF,
NeonTypeFlags TypeFlags,
bool HasLegalHalfType = true,
bool V1Ty = false,
bool AllowBFloatArgsAndRet = true) {
int IsQuad = TypeFlags.isQuad();
switch (TypeFlags.getEltType()) {
case NeonTypeFlags::Int8:
Expand Down Expand Up @@ -2002,7 +2004,7 @@ static mlir::Value buildAArch64TblBuiltinExpr(CIRGenFunction &CGF,

// Determine the type of this overloaded NEON intrinsic.
NeonTypeFlags Type = Result->getZExtValue();
auto Ty = GetNeonType(&CGF, Type);
mlir::cir::VectorType Ty = GetNeonType(&CGF, Type);
if (!Ty)
return nullptr;

Expand Down Expand Up @@ -2215,8 +2217,8 @@ mlir::Value CIRGenFunction::buildCommonNeonBuiltinExpr(
const bool allowBFloatArgsAndRet =
getTargetHooks().getABIInfo().allowBFloatArgsAndRet();

mlir::Type vTy = GetNeonType(this, neonType, hasLegalHalfType, false,
allowBFloatArgsAndRet);
mlir::cir::VectorType vTy = GetNeonType(this, neonType, hasLegalHalfType,
false, allowBFloatArgsAndRet);
if (!vTy)
return nullptr;

Expand Down Expand Up @@ -2947,7 +2949,7 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
}
}

auto Ty = GetNeonType(this, Type);
mlir::cir::VectorType Ty = GetNeonType(this, Type);
if (!Ty)
return nullptr;

Expand Down Expand Up @@ -3041,7 +3043,12 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
case NEON::BI__builtin_neon_vqshrun_n_v:
llvm_unreachable("NYI");
case NEON::BI__builtin_neon_vqrshrun_n_v:
llvm_unreachable("NYI");
// The prototype of builtin_neon_vqrshrun_n can be found at
// https://developer.arm.com/architectures/instruction-sets/intrinsics/
return buildNeonCall(
BuiltinID, *this,
{builder.getExtendedElementVectorType(Ty, true), SInt32Ty}, Ops,
"llvm.aarch64.neon.sqrshrun", Ty, getLoc(E->getExprLoc()));
case NEON::BI__builtin_neon_vqshrn_n_v:
llvm_unreachable("NYI");
case NEON::BI__builtin_neon_vrshrn_n_v:
Expand Down
69 changes: 69 additions & 0 deletions clang/test/CIR/CodeGen/aarch64-neon-simd-shift.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -fclangir \
// RUN: -ffreestanding -emit-cir -target-feature +neon %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -fclangir \
// RUN: -ffreestanding -emit-llvm -target-feature +neon %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s

// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_neon.h>

uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
return vqrshrun_n_s16(a, 3);
}

// CIR-LABEL: test_vqrshrun_n_s16
// CIR: [[INTRN_ARG1:%.*]] = cir.const #cir.int<3> : !s32i
// CIR: [[INTRN_ARG0:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s16i x 8>
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqrshrun" [[INTRN_ARG0]], [[INTRN_ARG1]] :
// CIR-SAME: (!cir.vector<!s16i x 8>, !s32i) -> !cir.vector<!u8i x 8>

// LLVM: {{.*}}test_vqrshrun_n_s16(<8 x i16>{{.*}} [[A:%.*]])
// LLVM: store <8 x i16> [[A]], ptr [[A_ADDR:%.*]], align 16
// LLVM: [[A_VAL:%.*]] = load <8 x i16>, ptr [[A_ADDR]], align 16
// LLVM: store <8 x i16> [[A_VAL]], ptr [[S0:%.*]], align 16
// LLVM: [[S0_VAL:%.*]] = load <8 x i16>, ptr [[S0]], align 16
// LLVM: [[S0_VAL_CAST:%.*]] = bitcast <8 x i16> [[S0_VAL]] to <16 x i8>
// LLVM: [[INTRN_ARG:%.*]] = bitcast <16 x i8> [[S0_VAL_CAST]] to <8 x i16>
// LLVM: {{%.*}} = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[INTRN_ARG]], i32 3)
// LLVM: ret <8 x i8> {{%.*}}

uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
return vqrshrun_n_s32(a, 7);
}

// CIR-LABEL: test_vqrshrun_n_s32
// CIR: [[INTRN_ARG1:%.*]] = cir.const #cir.int<7> : !s32i
// CIR: [[INTRN_ARG0:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s32i x 4>
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqrshrun" [[INTRN_ARG0]], [[INTRN_ARG1]] :
// CIR-SAME: (!cir.vector<!s32i x 4>, !s32i) -> !cir.vector<!u16i x 4>

// LLVM: {{.*}}test_vqrshrun_n_s32(<4 x i32>{{.*}} [[A:%.*]])
// LLVM: store <4 x i32> [[A]], ptr [[A_ADDR:%.*]], align 16
// LLVM: [[A_VAL:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
// LLVM: store <4 x i32> [[A_VAL]], ptr [[S0:%.*]], align 16
// LLVM: [[S0_VAL:%.*]] = load <4 x i32>, ptr [[S0]], align 16
// LLVM: [[S0_VAL_CAST:%.*]] = bitcast <4 x i32> [[S0_VAL]] to <16 x i8>
// LLVM: [[INTRN_ARG:%.*]] = bitcast <16 x i8> [[S0_VAL_CAST]] to <4 x i32>
// LLVM: {{%.*}} = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[INTRN_ARG]], i32 7)
// LLVM: ret <4 x i16> {{%.*}}

uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
return vqrshrun_n_s64(a, 15);
}

// CIR-LABEL: test_vqrshrun_n_s64
// CIR: [[INTRN_ARG1:%.*]] = cir.const #cir.int<15> : !s32i
// CIR: [[INTRN_ARG0:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s64i x 2>
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqrshrun" [[INTRN_ARG0]], [[INTRN_ARG1]] :
// CIR-SAME: (!cir.vector<!s64i x 2>, !s32i) -> !cir.vector<!u32i x 2>

// LLVM: {{.*}}test_vqrshrun_n_s64(<2 x i64>{{.*}} [[A:%.*]])
// LLVM: store <2 x i64> [[A]], ptr [[A_ADDR:%.*]], align 16
// LLVM: [[A_VAL:%.*]] = load <2 x i64>, ptr [[A_ADDR]], align 16
// LLVM: store <2 x i64> [[A_VAL]], ptr [[S0:%.*]], align 16
// LLVM: [[S0_VAL:%.*]] = load <2 x i64>, ptr [[S0]], align 16
// LLVM: [[S0_VAL_CAST:%.*]] = bitcast <2 x i64> [[S0_VAL]] to <16 x i8>
// LLVM: [[INTRN_ARG:%.*]] = bitcast <16 x i8> [[S0_VAL_CAST]] to <2 x i64>
// LLVM: {{%.*}} = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[INTRN_ARG]], i32 15)
// LLVM: ret <2 x i32> {{%.*}}