NVIDIA · LarryXFly · Aug 25, 2025 · Aug 20, 2025 · Aug 25, 2025
diff --git a/tests/integration/test_lists/qa/README.md b/tests/integration/test_lists/qa/README.md
@@ -32,7 +32,7 @@ Triton backend tests validate the integration with NVIDIA Triton Inference Serve
 The following Python packages are required for running QA tests:
 
 ```bash
-pip install mako oyaml rouge_score lm_eval
+pip3 install -r ${TensorRT-LLM_PATH}/requirements-dev.txt
 ```
 
 ### Dependency Details
@@ -52,7 +52,8 @@ This directory contains various test configuration files:
 - `llm_function_nim.txt` - NIM-specific functional test cases
 - `llm_function_multinode.txt` - Multi-node functional test cases
 - `llm_function_gb20x.txt` - GB20X release test cases
-- `llm_function_rtx6kd.txt` - RTX 6000 Ada specific tests
+- `llm_function_rtx6kd.txt` - RTX 6000 series specific tests
+- `llm_function_l20.txt` - L20 specific tests, only contains single gpu cases
 
 ### Performance Test Files
 - `llm_perf_full.yml` - Main performance test configuration

diff --git a/tests/integration/test_lists/qa/llm_function_l20.txt b/tests/integration/test_lists/qa/llm_function_l20.txt
@@ -0,0 +1,61 @@
+# only covers single gpu cases
+accuracy/test_llm_api.py::TestQwen2_7BInstruct::test_auto_dtype
+accuracy/test_llm_api.py::TestQwen2_7BInstruct::test_weight_only
+accuracy/test_llm_api.py::TestLlama3_1_8BInstruct::test_gather_generation_logits_cuda_graph
+accuracy/test_llm_api.py::TestLlama3_1_8BInstruct::test_logprobs
+accuracy/test_llm_api.py::TestPhi4MiniInstruct::test_auto_dtype
+accuracy/test_llm_api.py::TestPhi4MiniInstruct::test_fp8
+accuracy/test_llm_api.py::TestQwen2_5_1_5BInstruct::test_auto_dtype
+accuracy/test_llm_api.py::TestQwen2_5_1_5BInstruct::test_weight_only
+accuracy/test_llm_api.py::TestLlama3_1_8B::test_fp8_rowwise
+accuracy/test_llm_api.py::TestQwen2_7BInstruct::test_fp8
+accuracy/test_llm_api.py::TestQwen2_5_0_5BInstruct::test_fp8
+accuracy/test_llm_api.py::TestQwen2_5_1_5BInstruct::test_fp8
+accuracy/test_llm_api.py::TestQwen2_5_7BInstruct::test_fp8
+accuracy/test_llm_api.py::TestQwen2_5_7BInstruct::test_fp8_kvcache
+accuracy/test_llm_api.py::TestMistralNemo12B::test_auto_dtype
+accuracy/test_llm_api.py::TestMistralNemo12B::test_fp8
+
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_chunked_prefill[attn_backend=FLASHINFER]
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_chunked_prefill[attn_backend=TRTLLM]
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_sampler
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_beam_search
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=True-overlap_scheduler=True]
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=False-overlap_scheduler=False]
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_ngram
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding[xgrammar]
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding[llguidance]
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_4gpus[xgrammar]
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_4gpus[llguidance]
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[xgrammar]
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[llguidance]
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_ngram[xgrammar]
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_ngram[llguidance]
+
+accuracy/test_llm_api_pytorch.py::TestKanana_Instruct::test_auto_dtype
+accuracy/test_llm_api_pytorch.py::TestBielik11BInstruct::test_auto_dtype
+accuracy/test_llm_api_pytorch.py::TestBielik11BInstruct::test_fp8
+accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_auto_dtype
+accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8
+accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
+accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype_long_rope
+accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype
+accuracy/test_llm_api_pytorch.py::TestMistralNemo12B::test_auto_dtype
+
+test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-image-False]
+test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-video-False]
+test_e2e.py::test_ptp_quickstart_multimodal[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image-False]
+test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image-False]
+test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-video-False]
+test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image-False]
+test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image-True]
+test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-False]
+test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-True]
+test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[audio]
+test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[image]
+test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[image_audio]
+test_e2e.py::test_ptp_quickstart_bert[VANILLA-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
+test_e2e.py::test_ptp_quickstart_bert[TRTLLM-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
+test_e2e.py::test_ptp_star_attention_example[Llama3.1-8B-BF16-llama-3.1-model/Meta-Llama-3.1-8B]
+test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-hf-nvfp4-False-False]
+test_e2e.py::test_ptp_scaffolding[DeepSeek-R1-Distill-Qwen-7B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-7B]