Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix: 整理分支后跑通,添加HardSwish
  • Loading branch information
Chamberlain0w0 committed Jan 31, 2024
commit b82c861c3cf08a9eb72e93f4c77ac72f237c5bb3
18 changes: 11 additions & 7 deletions src/04kernel/src/kernels/gather/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@ namespace refactor::kernel {
#ifndef USE_BANG
return nullptr;
#endif

auto indicesDim = std::vector<int>(index.shape.begin(), index.shape.end());
if (indicesDim.size() == 0) {
indicesDim.push_back(1);
}
return std::make_unique<K>(decltype(info){
input.dataType,
DataType::I32,
axis ? axis : 0,
index.dataType,
axis,
std::vector<int>(input.shape.begin(), input.shape.end()),
std::vector<int>(index.shape.begin(), index.shape.end()),
std::move(indicesDim),
std::vector<int>(output.shape.begin(), output.shape.end()),
});
}
Expand Down Expand Up @@ -60,8 +63,9 @@ namespace refactor::kernel {
CNNL_ASSERT(cnnlSetTensorDescriptor(
d->inDesc, CNNL_LAYOUT_ARRAY, cnnlDataTypeConvert(info.dataType),
info.inDim.size(), info.inDim.data()));
// cnnlGatherV2 does not support int64 indices
CNNL_ASSERT(cnnlSetTensorDescriptor(
d->indexDesc, CNNL_LAYOUT_ARRAY, cnnlDataTypeConvert(info.indexDataType),
d->indexDesc, CNNL_LAYOUT_ARRAY, CNNL_DTYPE_INT32,
info.indexDim.size(), info.indexDim.data()));
CNNL_ASSERT(cnnlSetTensorDescriptor(
d->outDesc, CNNL_LAYOUT_ARRAY, cnnlDataTypeConvert(info.dataType),
Expand All @@ -71,15 +75,15 @@ namespace refactor::kernel {

res.fetchOrStore<CnnlContext>();
auto routine = [d = std::move(d),
shape = std::vector<int>(info.inDim.begin(), info.inDim.end()),
shape = std::vector<int>(info.inDim.begin(), info.inDim.end()),
workspaceSize,
dim = info.axis](Resources &res, void *workspace, void const *const *inputs, void *const *outputs) {
res.fetchOrStore<CnnlContext>()->copyFromCPU(workspace, shape.data(), workspaceSize);
CNNL_ASSERT(cnnlGatherV2(res.fetchOrStore<CnnlContext>()->handle, dim,
d->inDesc, inputs[0], reinterpret_cast<const int *>(workspace),
d->indexDesc, reinterpret_cast<const int *>(inputs[1]),
d->outDesc, outputs[0]));
};
};

return {std::move(routine), workspaceSize};
}
Expand Down
22 changes: 9 additions & 13 deletions src/04kernel/src/kernels/simple_binary/binary_cnnl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,7 @@ namespace refactor::kernel {
// !a.dataType.isFloat() ||
!ARTHIMETIC.contains(op) ||
// At least one of a,b should have the same shape as c
(a.shape != c.shape && b.shape != c.shape)
// Sub only supports brocasting b
// (a.shape != c.shape && op == Op::Sub)
) {
(a.shape != c.shape && b.shape != c.shape)) {
return nullptr;
}

Expand Down Expand Up @@ -124,9 +121,9 @@ namespace refactor::kernel {
auto handle = res.fetchOrStore<CnnlContext>()->handle;
size_t workspaceSize;
CNNL_ASSERT(cnnlGetBinaryWorkspaceSize(handle, d->aDesc,
d->bDesc, d->cDesc,
&workspaceSize));
d->bDesc, d->cDesc,
&workspaceSize));


res.fetchOrStore<CnnlContext>();
auto routine = [d = std::move(d),
Expand All @@ -147,11 +144,11 @@ namespace refactor::kernel {
beta = d->f32
? factor<fp32_t>(0)
: factor<fp64_t>(0);
CNNL_ASSERT(cnnlOpTensor(handle, d->opDesc,
&alphaA, d->aDesc, a,
&alphaB, d->bDesc, b,
workspace, workspaceSize,
&beta, d->cDesc, c));
CNNL_ASSERT(cnnlOpTensor(handle, d->opDesc,
&alphaA, d->aDesc, a,
&alphaB, d->bDesc, b,
workspace, workspaceSize,
&beta, d->cDesc, c));
} else if (op == SimpleBinaryType::Div) {
CNNL_ASSERT(cnnlDiv_v2(handle,
CNNL_COMPUTATION_HIGH_PRECISION,
Expand Down Expand Up @@ -179,7 +176,6 @@ namespace refactor::kernel {
d->cDesc, c,
workspace, workspaceSize));
}

};

return {std::move(routine), workspaceSize};
Expand Down
21 changes: 14 additions & 7 deletions src/04kernel/src/kernels/simple_unary/cnnl_activation_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace refactor::kernel {
: Kernel(), type(type_), dataType(dataType_), size(size_) {}

auto K::build(Op op, Tensor const &a) noexcept -> KernelBox {
static const std::unordered_set<Op> ARTHIMETIC{Op::Sigmoid, Op::Relu, Op::Tanh};
static const std::unordered_set<Op> ARTHIMETIC{Op::Sigmoid, Op::Relu, Op::Tanh, Op::HardSwish};

#ifndef USE_BANG
return nullptr;
Expand Down Expand Up @@ -64,20 +64,27 @@ namespace refactor::kernel {
auto d = std::make_shared<Descriptors>();

// clang-format off
auto mode = type == Ty::Relu ? CNNL_ACTIVATION_RELU
: type == Ty::Sigmoid ? CNNL_ACTIVATION_SIGMOID
: type == Ty::Tanh ? CNNL_ACTIVATION_TANH
auto mode = type == Ty::Relu ? CNNL_ACTIVATION_RELU
: type == Ty::Sigmoid ? CNNL_ACTIVATION_SIGMOID
: type == Ty::Tanh ? CNNL_ACTIVATION_TANH
: type == Ty::HardSwish ? CNNL_ACTIVATION_HARDSWISH
: UNREACHABLEX(cnnlActivationMode_t, "");
float coef = 0.0;
float slicedDim = 0.0;
float gamma = 0.0;
float scale = 0.0;
// clang-format on

setCnnlTensor(d->tensor, dataType, slice(&size, 1));
CNNL_ASSERT(cnnlSetActivationDescriptor_v2(d->activation, mode, CNNL_ACTIVATION_HIGH_PRECISION,
CNNL_NOT_PROPAGATE_NAN, 0.0));
CNNL_ASSERT(cnnlSetActivationDescriptor_v5(d->activation, mode,
CNNL_ACTIVATION_HIGH_PRECISION,
CNNL_NOT_PROPAGATE_NAN, coef,
slicedDim, gamma, scale, true));

res.fetchOrStore<CnnlContext>();
return [d = std::move(d)]//
(Resources & res, void *, void const *const *inputs, void *const *outputs) {
float alpha = 1, beta = 0;
float alpha = 1.f, beta = 0.f;
CNNL_ASSERT(cnnlActivationForward(
res.fetchOrStore<CnnlContext>()->handle,
d->activation,
Expand Down
1 change: 1 addition & 0 deletions src/04kernel/test/kernels/simple_unary/test_cnnl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ TEST(kernel, ActivationCnnl) {
testOp(SimpleUnaryType::Relu);
testOp(SimpleUnaryType::Sigmoid);
testOp(SimpleUnaryType::Tanh);
testOp(SimpleUnaryType::HardSwish);
}


Expand Down
4 changes: 2 additions & 2 deletions src/04kernel/test/kernels/transpose/test_cnnl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ using namespace hardware;
TEST(kernel, TransposeCnnl) {
// build routine
auto dataTensor = Tensor::share(DataType::F32, Shape{1, 3, 2, 5});
auto info = TransposeInfo(dataTensor->shape, Permutation{2, 3, 0, 1});
auto kCpu = TransposeCpu::build(dataTensor->dataType, info);
auto info = TransposeInfo(dataTensor->dataType, dataTensor->shape, Permutation{2, 3, 0, 1});
auto kCpu = TransposeCpu::build(info);
auto kernel = TransposeCnnl::build(dataTensor->dataType, dataTensor->shape, Permutation{2, 3, 0, 1});
ASSERT_TRUE(kCpu && kernel);
auto res = runtime::Resources();
Expand Down