Waive failed tests on main branch

Signed-off-by: qqiao <qqiao@nvidia.com>
NVIDIA · chzblych · Aug 25, 2025 · Aug 25, 2025 · Aug 25, 2025 · Aug 25, 2025
commit cf8ca7be898bdffe3c09aad69c5f3dd5c5558a08
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
@@ -321,3 +321,9 @@ full:L40S/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_
 full:L40S/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[MMLU-tp2pp2] SKIP (https://nvbugs/5471108)
 test_e2e.py::test_multi_nodes_eval[llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-tp8pp2-mmlu] SKIP (https://nvbugs/5473781)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-tp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True] SKIP (https://nvbugs/5476580)
+disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_llama_context_capacity[False-False-DeepSeek-V3-Lite-fp8/fp8] SKIP (https://nvbugs/5477404)
+triton_server/test_triton.py::test_python_bls_unit_tests[python-bls-unit-tests] SKIP (https://nvbugs/5477392)
+triton_server/test_triton.py::test_mistral_ib[mistral-ib] SKIP (https://nvbugs/5477399)
+triton_server/test_triton.py::test_eagle[eagle] SKIP (https://nvbugs/5477378)
+examples/test_mixtral.py::test_llm_mixtral_moe_plugin_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5477421)
+accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] SKIP (https://nvbugs/5455140)
diff --git a/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py b/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py
@@ -8,6 +8,7 @@
 from tensorrt_llm.llmapi import CudaGraphConfig, KvCacheConfig
 
 
+@pytest.mark.skip(reason="https://nvbugs/5418673")
 @pytest.mark.parametrize(
     "model_name",
     ["Llama-4-Maverick-17B-128E-Instruct", "Llama-4-Scout-17B-16E-Instruct"],

diff --git a/tests/unittest/llmapi/apps/_test_openai_chat.py b/tests/unittest/llmapi/apps/_test_openai_chat.py
@@ -14,6 +14,7 @@
 from .utils import (invalid_logit_bias_helper, logit_bias_effect_helper,
                     make_server_with_custom_sampler_fixture)
 
+pytestmark = pytest.mark.skip(reason="https://nvbugs/5477444")
 pytestmark = pytest.mark.threadleak(enabled=False)
 
 

diff --git a/tests/unittest/llmapi/test_executor.py b/tests/unittest/llmapi/test_executor.py
@@ -277,6 +277,7 @@ def create_rsp(id, finished: bool = False):
     return tllm.Response(request_id=0, result=result, client_id=0)
 
 
+@pytest.mark.skip(reason="https://nvbugs/5477359")
 def test_GenerationResultBase():
     sampling_params = SamplingParams(max_tokens=4)
     result = GenerationResultBase(
@@ -291,6 +292,7 @@ def test_GenerationResultBase():
     assert result._done
 
 
+@pytest.mark.skip(reason="https://nvbugs/5477359")
 def test_GenerationResult():
     request = GenerationRequest(prompt_token_ids=[12, 23, 34],
                                 sampling_params=SamplingParams(max_tokens=4))
@@ -303,6 +305,7 @@ def test_GenerationResult():
     assert result._done
 
 
+@pytest.mark.skip(reason="https://nvbugs/5477359")
 def test_DetokenizedGenerationResultBase():
     sampling_params = SamplingParams(max_tokens=4)
     model_path = llm_models_root() / "llama-models/llama-7b-hf"
@@ -434,6 +437,7 @@ def ResponsePostprocessWorker_worker_task(pull_pipe_addr, push_pipe_addr,
     worker.start()
 
 
+@pytest.mark.skip(reason="https://nvbugs/5477369")
 def test_ResponsePostprocessWorker():
 
     input_pipe = ZeroMqQueue(is_server=True)