Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix: 解决一些warning,并把sync操作从算子内部移除
  • Loading branch information
Chamberlain0w0 authored and YdrMaster committed Jan 31, 2024
commit 2258c1ee2d09cfe0add168d5944e6d3ec349c634
5 changes: 2 additions & 3 deletions src/04kernel/src/kernels/batch_normalization/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ namespace refactor::kernel {
auto y = outputs[0];

void *xTrans = workspace;
void *yTrans = xTrans + xTransSize;
void *cursor = yTrans + xTransSize;
void *yTrans = reinterpret_cast<uint8_t *>(xTrans) + xTransSize;
void *cursor = reinterpret_cast<uint8_t *>(yTrans) + xTransSize;

// transpose NCHW input to NHWC
CNNL_ASSERT(cnnlTranspose_v2(handle, d->NCHW2NHWC, d->inDesc, x,
Expand All @@ -147,7 +147,6 @@ namespace refactor::kernel {
CNNL_ASSERT(cnnlTranspose_v2(handle, d->NHWC2NCHW, d->inDescTrans, yTrans,
d->inDesc, y, cursor, workspaceSize));

BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
};

return {std::move(routine), totalWorkspaceSize};
Expand Down
1 change: 0 additions & 1 deletion src/04kernel/src/kernels/cast/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ namespace refactor::kernel {
return [d = std::move(d)](Resources &res, void *workspace, void const *const *inputs, void *const *outputs) {
CNNL_ASSERT(cnnlCastDataType(res.fetchOrStore<CnnlContext>()->handle,
d->inDesc, inputs[0], d->cast, d->outDesc, outputs[0]));
// BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
};
}

Expand Down
1 change: 0 additions & 1 deletion src/04kernel/src/kernels/clip/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ namespace refactor::kernel {
CNNL_POINTER_MODE_DEVICE, d->t,
inputs[0], inputs[1], hasMax ? inputs[2] : nullptr,
d->t, outputs[0]));
BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
};
}

Expand Down
4 changes: 2 additions & 2 deletions src/04kernel/src/kernels/concat/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ namespace refactor::kernel {
}
~Descriptors() noexcept(false) {
CNNL_ASSERT(cnnlDestroyTensorDescriptor(in));
for (auto i = 0; i < out.size(); i++) {
for (size_t i = 0; i < out.size(); i++) {
CNNL_ASSERT(cnnlDestroyTensorDescriptor(out[i]));
}
}
Expand All @@ -62,7 +62,7 @@ namespace refactor::kernel {
};
auto d = std::make_shared<Descriptors>(info.num, info.dataType != DT::F64);
setCnnlTensor(d->in, info.dataType, slice(info.inDim.data(), info.inDim.size()));
for (auto i = 0; i < info.outDims.size(); i++) {
for (size_t i = 0; i < info.outDims.size(); i++) {
setCnnlTensor(d->out[i], info.dataType, slice(info.outDims[i].data(), info.outDims[i].size()));
}

Expand Down
6 changes: 3 additions & 3 deletions src/04kernel/src/kernels/conv/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,9 @@ namespace refactor::kernel {
// }

void *xTrans = workspace;
void *wTrans = xTrans + xTransSize;
void *yTrans = wTrans + wTransSize;
void *opWorkspace = yTrans + yTransSize;
void *wTrans = reinterpret_cast<uint8_t *>(xTrans) + xTransSize;
void *yTrans = reinterpret_cast<uint8_t *>(wTrans) + wTransSize;
void *opWorkspace = reinterpret_cast<uint8_t *>(yTrans) + yTransSize;

// transpose NCHW input to NHWC
CNNL_ASSERT(cnnlTranspose_v2(handle, d->NCHW2NHWC, d->x, x,
Expand Down
1 change: 0 additions & 1 deletion src/04kernel/src/kernels/expand/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ namespace refactor::kernel {
return [d = std::move(d)](Resources &res, void *workspace, void const *const *inputs, void *const *outputs) {
CNNL_ASSERT(cnnlExpand(res.fetchOrStore<CnnlContext>()->handle,
d->inDesc, inputs[0], d->outDesc, outputs[0]));
// BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
};
}
#endif
Expand Down
1 change: 0 additions & 1 deletion src/04kernel/src/kernels/gather/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ namespace refactor::kernel {
d->inDesc, inputs[0], reinterpret_cast<const int *>(workspace),
d->indexDesc, reinterpret_cast<const int *>(inputs[1]),
d->outDesc, outputs[0]));
BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
};

return {std::move(routine), workspaceSize};
Expand Down
1 change: 0 additions & 1 deletion src/04kernel/src/kernels/mat_mul/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ namespace refactor::kernel {
workspace, algoWorkspaceSize));
}

BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
};

return {std::move(routine), algoWorkspaceSize};
Expand Down
4 changes: 2 additions & 2 deletions src/04kernel/src/kernels/pool/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ namespace refactor::kernel {
auto handle = res.fetchOrStore<CnnlContext>()->handle;

void *extraInputDev = workspace;
void *poolWorkSpace = workspace + extraInputSize;
void *poolWorkSpace = reinterpret_cast<uint8_t *>(workspace) + extraInputSize;

void *extraInputHost = malloc(extraInputSize);
CNNL_ASSERT(cnnlInitPoolingExtraInput(handle, d->pooling, d->x, d->y, extraInputHost));
Expand All @@ -145,7 +145,7 @@ namespace refactor::kernel {
&b, extraInputDev, d->y, outputs[0],
poolWorkSpace, workspaceSize));

BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
res.fetchOrStore<CnnlContext>()->queueSync();

free(extraInputHost);
};
Expand Down
1 change: 0 additions & 1 deletion src/04kernel/src/kernels/simple_binary/binary_cnnl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ namespace refactor::kernel {
workspace, workspaceSize));
}

BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
};

return {std::move(routine), workspaceSize};
Expand Down
2 changes: 1 addition & 1 deletion src/04kernel/src/kernels/slice/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ namespace refactor::kernel {
CNNL_ASSERT(cnnlSetTensorDescriptor(d->in, CNNL_LAYOUT_NCHW, cnnlDataTypeConvert(info.dataType), info.inDim.size(), info.inDim.data()));
CNNL_ASSERT(cnnlSetTensorDescriptor(d->out, CNNL_LAYOUT_NCHW, cnnlDataTypeConvert(info.dataType), info.outDim.size(), info.outDim.data()));
std::vector<int> begin, end, stride;
for (auto i = 0; i < info.dims.size(); i++) {
for (size_t i = 0; i < info.dims.size(); i++) {
// [begin, end), end is not inclued
begin.push_back(info.dims[i].start);
auto sign = info.dims[i].step > 0 ? 1 : -1;
Expand Down
1 change: 0 additions & 1 deletion src/04kernel/src/kernels/softmax/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ namespace refactor::kernel {
CNNL_COMPUTATION_ULTRAHIGH_PRECISION,
&a, d->t, inputs[0],
&b, d->t, outputs[0]));
res.fetchOrStore<CnnlContext>()->queueSync();
};
}

Expand Down
4 changes: 2 additions & 2 deletions src/04kernel/src/kernels/split/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ namespace refactor::kernel {
}
~Descriptors() noexcept(false) {
CNNL_ASSERT(cnnlDestroyTensorDescriptor(in));
for (auto i = 0; i < out.size(); i++) {
for (size_t i = 0; i < out.size(); i++) {
CNNL_ASSERT(cnnlDestroyTensorDescriptor(out[i]));
}
}
Expand All @@ -81,7 +81,7 @@ namespace refactor::kernel {
// setCnnlTensor(d->in, info.dataType, slice(info.inDim.data(), info.inDim.size()));
CNNL_ASSERT(cnnlSetTensorDescriptor(d->in, CNNL_LAYOUT_NCHW, cnnlDataTypeConvert(info.dataType), info.inDim.size(), info.inDim.data()));

for (auto i = 0; i < info.outDims.size(); i++) {
for (size_t i = 0; i < info.outDims.size(); i++) {
// setCnnlTensor(d->out[i], info.dataType, slice(info.outDims[i].data(), info.outDims[i].size()));
CNNL_ASSERT(cnnlSetTensorDescriptor(d->out[i], CNNL_LAYOUT_NCHW, cnnlDataTypeConvert(info.dataType), info.outDims[i].size(), info.outDims[i].data()));
}
Expand Down
1 change: 0 additions & 1 deletion src/04kernel/src/kernels/where/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ namespace refactor::kernel {
d->y, inputs[2], workspace, workspaceSize,
d->ans, outputs[0]));

res.fetchOrStore<CnnlContext>()->queueSync();
};

return {std::move(routine), workspaceSize};
Expand Down
4 changes: 2 additions & 2 deletions src/04kernel/src/utilities/bang/cnrt_functions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <cnrt.h>
#include <cstdio>

namespace refactor::kernel::cnnl {
namespace refactor::kernel::bang {

int currentDevice() {
int device;
Expand All @@ -22,6 +22,6 @@ namespace refactor::kernel::cnnl {
CNRT_MEM_TRANS_DIR_DEV2HOST));
}

}// namespace refactor::kernel::cnnl
}// namespace refactor::kernel::bang

#endif
4 changes: 2 additions & 2 deletions src/04kernel/src/utilities/bang/cnrt_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@

#include "common.h"

namespace refactor::kernel::cnnl {
namespace refactor::kernel::bang {

int currentDevice();

void sync();

void copyOut(void *dst, const void *src, size_t size);

}// namespace refactor::kernel::cnnl
}// namespace refactor::kernel::bang

#endif// KERNEL_CNRT_FUNCTIONS_H
2 changes: 2 additions & 0 deletions src/04kernel/test/kernels/batch_normalization/test_cnnl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "../../../src/kernels/batch_normalization/cnnl_kernel.hh"
#include "../../../src/kernels/batch_normalization/cpu_kernel.hh"
#include "../src/utilities/bang/cnrt_functions.h"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>

Expand Down Expand Up @@ -57,6 +58,7 @@ TEST(kernel, BatchNormalizationCnnl) {
void const *inputs[]{*mluIn, *mluScale, *mluBias, *mluMean, *mluVar};
void *outputs[]{*mluOut};
rMlu(res, *workspace, inputs, outputs);
kernel::bang::sync();
}
// take output data
std::vector<float> result(outTensor->elementsSize());
Expand Down
4 changes: 3 additions & 1 deletion src/04kernel/test/kernels/cast/test_cnnl.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#ifdef USE_BANG

#include "../../../src/kernels/cast/cpu_kernel.hh"
#include "../../../src/kernels/cast/cnnl_kernel.hh"
#include "../../../src/kernels/cast/cpu_kernel.hh"
#include "../src/utilities/bang/cnrt_functions.h"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>
#include <numeric>
Expand Down Expand Up @@ -34,6 +35,7 @@ TEST(kernel, CastCnnl) {
void const *inputs[]{*xMlu};
void *outputs[]{*yMlu};
routine(res, nullptr, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{x_.data()};
Expand Down
4 changes: 3 additions & 1 deletion src/04kernel/test/kernels/clip/test_cnnl.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#ifdef USE_BANG

#include "../../../src/kernels/clip/cpu_kernel.hh"
#include "../../../src/kernels/clip/cnnl_kernel.hh"
#include "../../../src/kernels/clip/cpu_kernel.hh"
#include "../src/utilities/bang/cnrt_functions.h"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>
#include <numeric>
Expand Down Expand Up @@ -36,6 +37,7 @@ TEST(kernel, ClipCnnl) {
void const *inputs[]{*mluMem, *mluMin, *mluMax};
void *outputs[]{*mluMem};
routine(res, nullptr, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{value.data(), &min, &max};
Expand Down
4 changes: 3 additions & 1 deletion src/04kernel/test/kernels/concat/test_cnnl.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#ifdef USE_BANG

#include "../../../src/kernels/concat/cpu_kernel.hh"
#include "../../../src/kernels/concat/cnnl_kernel.hh"
#include "../../../src/kernels/concat/cpu_kernel.hh"
#include "../src/utilities/bang/cnrt_functions.h"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>
#include <numeric>
Expand Down Expand Up @@ -65,6 +66,7 @@ TEST(kernel, ConcatCnnl) {
void const *inputs[]{*mluIns[0], *mluIns[1], *mluIns[2], *mluIns[3]};
void *outputs[]{*mluOut};
routine(res, *workspace, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{cpuIns[0].data(), cpuIns[1].data(), cpuIns[2].data(), cpuIns[3].data()};
Expand Down
11 changes: 2 additions & 9 deletions src/04kernel/test/kernels/conv/test_cnnl.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifdef USE_BANG

#include "../../../src/kernels/conv/cnnl_kernel.hh"
#include "../src/utilities/bang/cnrt_functions.h"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>
#include <numeric>
Expand Down Expand Up @@ -39,15 +40,7 @@ void testConvCnnl(int rank, const int64_t *pads, const int64_t *strides, const i
void const *inputs[]{*xMlu, *wMlu};
void *outputs[]{*yMlu};
routine(res, *workspace, inputs, outputs);

xMlu->copyToHost(xData.data(), xTensor->bytesSize());
wMlu->copyToHost(wData.data(), wTensor->bytesSize());
// fmt::println("{}", vec2str(xData));
// fmt::println("{}", vec2str(wData));

// std::vector<float> ws(workspaceSize);
// workspace->copyToHost(ws.data(), workspaceSize);
// fmt::println("{}", vec2str(ws));
kernel::bang::sync();

// take output data
std::vector<float> result(yTensor->elementsSize());
Expand Down
2 changes: 2 additions & 0 deletions src/04kernel/test/kernels/expand/test_cnnl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "../../../src/kernels/expand/cnnl_kernel.hh"
#include "../../../src/kernels/expand/cpu_kernel.hh"
#include "../src/utilities/bang/cnrt_functions.h"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>
#include <numeric>
Expand Down Expand Up @@ -36,6 +37,7 @@ TEST(kernel, ExpandCnnl) {
void const *inputs[]{*mluIn};
void *outputs[]{*mluOut};
routine(res, nullptr, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{data.data()};
Expand Down
6 changes: 5 additions & 1 deletion src/04kernel/test/kernels/gather/test_gather_cnnl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "../src/kernels/gather/cnnl_kernel.hh"
#include "../src/kernels/gather/cpu_kernel.hh"
#include "../src/utilities/bang/cnrt_functions.h"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>

Expand Down Expand Up @@ -39,6 +40,7 @@ TEST(kernel, GatherCnnl) {
void const *inputs[]{*aMLU, *bMLU};
void *outputs[]{*cMLU};
cnnlRoutine(res, *workspace, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{a.data(), b.data()};
Expand Down Expand Up @@ -81,6 +83,7 @@ TEST(kernel, GatherCnnl) {
void const *inputs[]{*aMLU, *bMLU};
void *outputs[]{*cMLU};
cnnlRoutine(res, *workspace, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{a.data(), b.data()};
Expand Down Expand Up @@ -110,7 +113,7 @@ TEST(kernel, GatherCnnl) {
auto cpuRoutine = cpuKernel->lower(res).routine;
// Init inputs and outputs
std::vector<float> a;
for (auto i = 0; i < data->elementsSize(); i++) {
for (size_t i = 0; i < data->elementsSize(); i++) {
a.push_back(i + 0.1f);
}
std::vector<int64_t> b(indices->elementsSize(), 0);
Expand All @@ -126,6 +129,7 @@ TEST(kernel, GatherCnnl) {
void const *inputs[]{*aMLU, *bMLU};
void *outputs[]{*cMLU};
cnnlRoutine(res, *workspace, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{a.data(), b.data()};
Expand Down
5 changes: 5 additions & 0 deletions src/04kernel/test/kernels/mat_mul/test_cnnl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "../src/kernels/mat_mul/cnnl_kernel.hh"
#include "../src/kernels/mat_mul/cpu_kernel.hh"
#include "../src/utilities/bang/cnrt_functions.h"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>

Expand Down Expand Up @@ -48,6 +49,7 @@ TEST(kernel, MatMulCnnl_OnlyBias) {
void const *inputs[]{*ma, *mb, *mc};
void *outputs[]{*my};
routine(res, *workspace, inputs, outputs);
kernel::bang::sync();
// take output data
std::vector<float> result(Y->elementsSize());
my->copyToHost(result.data(), Y->bytesSize());
Expand Down Expand Up @@ -91,6 +93,7 @@ TEST(kernel, MatMulCnnl_Broadcast) {
void const *inputs[]{*ma, *mb, *mc};
void *outputs[]{*my};
mluRoutine(res, *workspace, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{dataA.data(), dataB.data(), dataC.data()};
Expand Down Expand Up @@ -135,6 +138,7 @@ TEST(kernel, MatMulCnnl_TransABNoBias) {
void const *inputs[]{*ma, *mb};
void *outputs[]{*my};
mluRoutine(res, *workspace, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{dataA.data(), dataB.data()};
Expand Down Expand Up @@ -189,6 +193,7 @@ TEST(kernel, MatMulCnnl_Large) {
void const *inputs[]{*ma, *mb, *mc};
void *outputs[]{*my};
mluRoutine(res, *workspace, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{dataA.data(), dataB.data(), dataC.data()};
Expand Down
Loading