diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index e3dcf8ead7e..bdc82cece59 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -349,4 +349,7 @@ full:L20/accuracy/test_llm_api_pytorch.py::TestLlama3_2_1B::test_fp8_prequantize full:L20/accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8 SKIP (https://nvbugs/5542862) full:L40S/accuracy/test_llm_api_pytorch.py::TestLlama3_2_1B::test_fp8_prequantized SKIP (https://nvbugs/5542862) full:L40S/accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8 SKIP (https://nvbugs/5542862) +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=0-tp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5541494) +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2] SKIP (https://nvbugs/5543035) +unittest/_torch/multi_gpu_modeling/test_llama3.py::test_llama_3_3 SKIP (https://nvbugs/5536131) full:A100/accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype SKIP (https://nvbugs/5542878) diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/test_ad_trtllm_bench.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/test_ad_trtllm_bench.py index 1b61d495760..e3fdea5b4ec 100644 --- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/test_ad_trtllm_bench.py +++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/test_ad_trtllm_bench.py @@ -798,6 +798,7 @@ def trtllm_bench_unified_comparison( ) +@pytest.mark.skip(reason="https://nvbugs/5542907") @pytest.mark.parametrize("compile_backend", ["torch-compile", "torch-opt", "torch-cudagraph"]) def test_trtllm_bench(llm_root, compile_backend): # noqa: F811 model_path_or_name, model_name, model_path = tiny_llama_details()