Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 2 additions & 28 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,32 +69,6 @@ function(add_gtest test_name test_src)
add_dependencies(google-tests ${test_name})
endfunction()

add_subdirectory(unit_tests)

add_gtest(mpiUtilsTest runtime/mpiUtilsTest.cpp)

add_gtest(gptDecoderTest runtime/gptDecoderTest.cpp)
add_gtest(gptDecoderBatchedTest runtime/gptDecoderBatchedTest.cpp)
add_gtest(medusaModuleTest runtime/medusaModuleTest.cpp)

add_gtest(moeLoadBalancerTest runtime/moeLoadBalancerTest.cpp)

add_gtest(sanitizerTest runtime/sanitizerTest.cpp)

add_gtest(eaglePackDataTest kernels/eaglePackDataTest.cpp)

add_gtest(medusaDecodeLayerTest layers/medusaDecodeLayerTest.cpp)

add_gtest(moeLoadBalanceKernelTest kernels/moeLoadBalanceKernelTest.cpp)

add_gtest(eagleLayerTest layers/eagleLayerTest.cpp)

add_subdirectory(utils)

if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/batch_manager)
add_subdirectory(batch_manager)
endif()

if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/executor)
add_subdirectory(executor)
endif()
add_subdirectory(unit_tests)
add_subdirectory(e2e_tests)
13 changes: 13 additions & 0 deletions cpp/tests/e2e_tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION &
# AFFILIATES. All rights reserved. SPDX-License-Identifier: NVIDIA TensorRT
# Source Code License Agreement
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this material and related documentation without an express
# license agreement from NVIDIA CORPORATION or its affiliates is strictly
# prohibited.

add_subdirectory(batch_manager)
add_subdirectory(executor)
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,10 @@
# license agreement from NVIDIA CORPORATION or its affiliates is strictly
# prohibited.

add_gtest(cacheTransceiverTest cacheTransceiverTest.cpp)

# guidedDecoderTest requires model tokenizer info, so it's easier to run it with
# e2e tests instead of unit tests.
add_gtest(guidedDecoderTest guidedDecoderTest.cpp)
add_gtest(trtEncoderModelTest trtEncoderModelTest.cpp)
add_gtest(trtGptModelTest trtGptModelTest.cpp)
add_gtest(trtGptModelRealDecoderTest trtGptModelRealDecoderTest.cpp)
target_link_libraries(trtGptModelRealDecoderTest PRIVATE testingUtils)

add_gtest(peftCacheManagerTest peftCacheManagerTest.cpp)
add_gtest(trtEncoderModelTest trtEncoderModelTest.cpp)
add_gtest(guidedDecoderTest guidedDecoderTest.cpp)
add_gtest(blockKeyTest blockKeyTest.cpp)
Original file line number Diff line number Diff line change
Expand Up @@ -1405,7 +1405,7 @@ INSTANTIATE_TEST_SUITE_P(LlamaCon2TP1Gen1TP2PP2DisaaggOrchestrator, DisaggOrches
),
generateTestNameDisaggParams);

INSTANTIATE_TEST_SUITE_P(LlamaCon2TP2Gen2TP1DisaaggSpawnOrchestrator, DisaggOrchestratorParamsTest,
INSTANTIATE_TEST_SUITE_P(LlamaCon2TP2Gen2TP1DisaggSpawnOrchestrator, DisaggOrchestratorParamsTest,
testing::Combine( //
testing::Values(1), // processNum
testing::Values(
Expand All @@ -1418,7 +1418,7 @@ INSTANTIATE_TEST_SUITE_P(LlamaCon2TP2Gen2TP1DisaaggSpawnOrchestrator, DisaggOrch
),
generateTestNameDisaggParams);

INSTANTIATE_TEST_SUITE_P(LlamaCon2TP1Gen2PP2DisaaggSpawnOrchestrator, DisaggOrchestratorParamsTest,
INSTANTIATE_TEST_SUITE_P(LlamaCon2TP1Gen2PP2DisaggSpawnOrchestrator, DisaggOrchestratorParamsTest,
testing::Combine( //
testing::Values(1), // processNum
testing::Values(
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/unit_tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ endif()

add_subdirectory(common)
add_subdirectory(kernels)
add_subdirectory(multi_gpu)
add_subdirectory(layers)
add_subdirectory(runtime)
add_subdirectory(thop)
Expand Down
4 changes: 3 additions & 1 deletion cpp/tests/unit_tests/batch_manager/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
# license agreement from NVIDIA CORPORATION or its affiliates is strictly
# prohibited.

add_gtest(blockKeyTest blockKeyTest.cpp)
add_gtest(cacheTransBufferTest cacheTransBufferTest.cpp)
add_gtest(capacitySchedulerTest capacitySchedulerTest.cpp)
add_gtest(contextProgressTest contextProgressTest.cu)
add_gtest(evictionPolicyTest evictionPolicyTest.cpp)
add_gtest(kvCacheManagerTest kvCacheManagerTest.cpp)
add_gtest(kvCacheUtilsTest kvCacheUtilsTest.cpp)
add_gtest(llmRequestTest llmRequestTest.cpp)
add_gtest(microBatchSchedulerTest microBatchSchedulerTest.cpp)
add_gtest(peftCacheManagerTest peftCacheManagerTest.cpp)
add_gtest(staticThreadPoolTest staticThreadPoolTest.cpp)
add_gtest(cacheTransBufferTest cacheTransBufferTest.cpp)
17 changes: 4 additions & 13 deletions cpp/tests/unit_tests/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,6 @@ add_gtest(mlaChunkedPrefillTest mlaChunkedPrefillTest.cu)

add_gtest(fusedMoeCommKernelTest fusedMoeCommKernelTest.cpp)

if(NOT ENABLE_MULTI_DEVICE EQUAL 0)
add_gtest(allReduceKernelTest allReduce/allReduceKernelTest.cu)
add_gtest(allReduceFusionTest allReduce/allReduceFusionTest.cu)
add_gtest(gemmAllReduceTest allReduce/gemmAllReduceTest.cu)
if(USING_OSS_CUTLASS_ALLREDUCE_GEMM)
target_link_libraries(gemmAllReduceTest PRIVATE ar_gemm_src)
target_compile_definitions(gemmAllReduceTest
PRIVATE USING_OSS_CUTLASS_ALLREDUCE_GEMM)
endif()
endif()

add_gtest(
gemmSwigluRunnerTest
fused_gated_gemm/gemmSwigluRunnerTest.cu
Expand Down Expand Up @@ -88,11 +77,13 @@ set(SAMPLING_KERNEL_TEST_SRC
sampling/samplingTest.cpp sampling/samplingTopKTest.cpp
sampling/samplingTopPTest.cpp sampling/samplingAirTopPTest.cpp
sampling/samplingPenaltyTest.cpp sampling/samplingUtilsTest.cu)

add_gtest(samplingKernelsTest "${SAMPLING_KERNEL_TEST_SRC}")

set(ROUTING_KERNEL_TEST_SRC
routing/routingTest.cpp routing/routingLlama4Test.cpp
routing/routingRenormalizeTest.cpp routing/routingDeepSeekTest.cpp)

add_gtest(routingKernelsTest "${ROUTING_KERNEL_TEST_SRC}")

add_gtest(moeLoadBalanceKernelTest moeLoadBalanceKernelTest.cpp)

add_gtest(eaglePackDataTest eaglePackDataTest.cpp)
2 changes: 2 additions & 0 deletions cpp/tests/unit_tests/layers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,7 @@ set(LOOKAHEAD_DECODING_TEST_SRC randomLlm.cpp lookaheadDecodingLayerTest.cpp)
add_gtest(lookaheadDecodingLayerTest "${LOOKAHEAD_DECODING_TEST_SRC}")

add_gtest(dynamicDecodeLayerTest dynamicDecodeLayerTest.cpp)
add_gtest(eagleLayerTest eagleLayerTest.cpp)
add_gtest(explicitDraftTokensLayerTest explicitDraftTokensLayerTest.cpp)
add_gtest(layerUtilsTest layerUtilsTest.cpp)
add_gtest(medusaDecodeLayerTest medusaDecodeLayerTest.cpp)
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "tests/layers/eagleLayerTest.h"
#include "eagleLayerTest.h"
#include "tensorrt_llm/common/memoryUtils.h"
#include "tensorrt_llm/kernels/decodingCommon.h"
#include "tensorrt_llm/kernels/speculativeDecoding/eagleDecodingKernels.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "tests/layers/medusaDecodeLayerTest.h"
#include "medusaDecodeLayerTest.h"
#include "tensorrt_llm/kernels/decodingCommon.h"
#include "tensorrt_llm/runtime/medusaModule.h"
#include "tensorrt_llm/runtime/runtimeKernels.h"
Expand Down
16 changes: 16 additions & 0 deletions cpp/tests/unit_tests/multi_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION &
# AFFILIATES. All rights reserved. SPDX-License-Identifier: NVIDIA TensorRT
# Source Code License Agreement
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this material and related documentation without an express
# license agreement from NVIDIA CORPORATION or its affiliates is strictly
# prohibited.

add_subdirectory(kernels)

add_gtest(cacheTransceiverTest cacheTransceiverTest.cpp)
add_gtest(mpiUtilsTest mpiUtilsTest.cpp)
add_gtest(userBufferTest userBufferTest.cpp)
21 changes: 21 additions & 0 deletions cpp/tests/unit_tests/multi_gpu/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION &
# AFFILIATES. All rights reserved. SPDX-License-Identifier: NVIDIA TensorRT
# Source Code License Agreement
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this material and related documentation without an express
# license agreement from NVIDIA CORPORATION or its affiliates is strictly
# prohibited.

if(NOT ENABLE_MULTI_DEVICE EQUAL 0)
add_gtest(allReduceKernelTest allReduce/allReduceKernelTest.cu)
add_gtest(allReduceFusionTest allReduce/allReduceFusionTest.cu)
add_gtest(gemmAllReduceTest allReduce/gemmAllReduceTest.cu)
if(USING_OSS_CUTLASS_ALLREDUCE_GEMM)
target_link_libraries(gemmAllReduceTest PRIVATE ar_gemm_src)
target_compile_definitions(gemmAllReduceTest
PRIVATE USING_OSS_CUTLASS_ALLREDUCE_GEMM)
endif()
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -501,11 +501,8 @@ TEST(Kernel_AllReduceFusion, AllReduceAccuracyRandomTokenNum)
auto& comm = mpi::MpiComm::world();
auto world_size = comm.getSize();
auto rank = comm.getRank();
if (world_size % 2)
{
TLLM_LOG_WARNING("world size is not a multiple of 2, return");
return;
}
ASSERT_EQ(world_size % 2, 0) << "Requires even world size (got " << world_size << ")";

int iter = 100;
std::vector<int> candidate_hidden_dim{1024, 2048, 4096, 7168, 8192};
int min_token_num = 1;
Expand Down Expand Up @@ -537,11 +534,8 @@ TEST(Kernel_AllReduceFusion, AllReduceAccuracyFixedTokenNum)
auto& comm = mpi::MpiComm::world();
auto world_size = comm.getSize();
auto rank = comm.getRank();
if (world_size % 2)
{
TLLM_LOG_WARNING("world size is not a multiple of 2, return");
return;
}
ASSERT_EQ(world_size % 2, 0) << "Requires even world size (got " << world_size << ")";

int iter = 10;
std::vector<int> candidate_hidden_dim{1024, 2048, 4096, 7168, 8192};
int min_token_num = 1;
Expand Down Expand Up @@ -603,11 +597,8 @@ TEST(Kernel_AllReduceFusion, AllReduceFusionAccuracyDifferentHiddenDim)
auto& comm = mpi::MpiComm::world();
auto world_size = comm.getSize();
auto rank = comm.getRank();
if (world_size % 2)
{
TLLM_LOG_WARNING("world size is not a multiple of 2, return");
return;
}
ASSERT_EQ(world_size % 2, 0) << "Requires even world size (got " << world_size << ")";

int const arch = tensorrt_llm::common::getSMVersion();
if (arch >= 100)
{
Expand Down Expand Up @@ -647,11 +638,8 @@ TEST(Kernel_AllReduceFusion, AllReduceFusionAccuracyDifferentDType)
auto& comm = mpi::MpiComm::world();
auto world_size = comm.getSize();
auto rank = comm.getRank();
if (world_size % 2)
{
TLLM_LOG_WARNING("world size is not a multiple of 2, return");
return;
}
ASSERT_EQ(world_size % 2, 0) << "Requires even world size (got " << world_size << ")";

std::vector<int> candidate_hidden_dim{1024, 2048, 4096, 7168, 8192};
int min_token_num = 1;
int max_token_num = 2048;
Expand Down Expand Up @@ -683,53 +671,52 @@ TEST(Kernel_AllReduceFusion, AllReduceFusionAccuracyDifferentDType)
TEST(Kernel_AllReduceFusion, Perf)
{
int const arch = tensorrt_llm::common::getSMVersion();
if (arch >= 100)
if (arch < 100)
{
GTEST_SKIP() << "Skipping test for SM < 100";
}

using Runner = TestRunner<half, ar_fusion::AllReduceFusionPattern::kARResidualRMSNormFP4Quant>;
auto& comm = mpi::MpiComm::world();
auto world_size = comm.getSize();
auto rank = comm.getRank();
ASSERT_EQ(world_size % 2, 0) << "Requires even world size (got " << world_size << ")";

int warmup = 100, iter = 300;
int hidden_dim = 7168;
std::vector<int> candidate_token_num{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048};
int max_token_num = 2048;
Runner runner(max_token_num, hidden_dim);
for (auto token_num : candidate_token_num)
{
using Runner = TestRunner<half, ar_fusion::AllReduceFusionPattern::kARResidualRMSNormFP4Quant>;
auto& comm = mpi::MpiComm::world();
auto world_size = comm.getSize();
auto rank = comm.getRank();
if (world_size % 2)
auto latency = runner.benchmark(&Runner::run_kernel, warmup, iter, token_num, hidden_dim);
if (rank == 0)
{
TLLM_LOG_WARNING("world size is not a multiple of 2, return");
return;
TLLM_LOG_INFO(
"token_num %-4d, hidden_dim %-4d, fusion kernel latency %4.4fus", token_num, hidden_dim, latency);
}
int warmup = 100, iter = 300;
int hidden_dim = 7168;
std::vector<int> candidate_token_num{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048};
int max_token_num = 2048;
Runner runner(max_token_num, hidden_dim);
for (auto token_num : candidate_token_num)
auto nccl_latency = runner.benchmark(&Runner::run_nccl_allreduce, warmup, iter, token_num, hidden_dim);
if (rank == 0)
{
auto latency = runner.benchmark(&Runner::run_kernel, warmup, iter, token_num, hidden_dim);
if (rank == 0)
{
TLLM_LOG_INFO(
"token_num %-4d, hidden_dim %-4d, fusion kernel latency %4.4fus", token_num, hidden_dim, latency);
}
auto nccl_latency = runner.benchmark(&Runner::run_nccl_allreduce, warmup, iter, token_num, hidden_dim);
if (rank == 0)
{
TLLM_LOG_INFO("nccl allreduce latency %4.4fus", nccl_latency);
}
auto residual_latency = runner.benchmark(&Runner::run_residual_add, warmup, iter, token_num, hidden_dim);
if (rank == 0)
{
TLLM_LOG_INFO("residual add latency %4.4fus", residual_latency);
}
auto rms_latency = runner.benchmark(&Runner::run_rms_norm, warmup, iter, token_num, hidden_dim);
if (rank == 0)
{
TLLM_LOG_INFO("rms norm latency %4.4fus", rms_latency);
}
auto quant_latency = runner.benchmark(&Runner::run_fp4_quant, warmup, iter, token_num, hidden_dim);
if (rank == 0)
{
TLLM_LOG_INFO("fp4 quant latency %4.4fus", quant_latency);
auto tot_latency = nccl_latency + residual_latency + rms_latency + quant_latency;
TLLM_LOG_INFO("fusion kernel latency %4.4fus, nccl + ops latency %4.4fus, total speedup %2.4fx",
latency, tot_latency, tot_latency / latency);
}
TLLM_LOG_INFO("nccl allreduce latency %4.4fus", nccl_latency);
}
auto residual_latency = runner.benchmark(&Runner::run_residual_add, warmup, iter, token_num, hidden_dim);
if (rank == 0)
{
TLLM_LOG_INFO("residual add latency %4.4fus", residual_latency);
}
auto rms_latency = runner.benchmark(&Runner::run_rms_norm, warmup, iter, token_num, hidden_dim);
if (rank == 0)
{
TLLM_LOG_INFO("rms norm latency %4.4fus", rms_latency);
}
auto quant_latency = runner.benchmark(&Runner::run_fp4_quant, warmup, iter, token_num, hidden_dim);
if (rank == 0)
{
TLLM_LOG_INFO("fp4 quant latency %4.4fus", quant_latency);
auto tot_latency = nccl_latency + residual_latency + rms_latency + quant_latency;
TLLM_LOG_INFO("fusion kernel latency %4.4fus, nccl + ops latency %4.4fus, total speedup %2.4fx", latency,
tot_latency, tot_latency / latency);
}
}
}
Loading