From 9188830882f742d7681b7591ae8b219a88b902c9 Mon Sep 17 00:00:00 2001 From: qgai Date: Wed, 12 Nov 2025 09:05:52 +0000 Subject: [PATCH 1/3] try to reproduce /nvbug/5461796_v2 Signed-off-by: qgai --- examples/llm-api/llm_speculative_decoding.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/llm-api/llm_speculative_decoding.py b/examples/llm-api/llm_speculative_decoding.py index 8048bede8ee..bbce4062973 100644 --- a/examples/llm-api/llm_speculative_decoding.py +++ b/examples/llm-api/llm_speculative_decoding.py @@ -26,7 +26,6 @@ def run_MTP(model: Optional[str] = None): model=model or "nvidia/DeepSeek-R1-FP4", speculative_config=spec_config, ) - for prompt in prompts: response = llm.generate(prompt, SamplingParams(max_tokens=10)) print(response.outputs[0].text) From f49c1b2a44ff8e4f6274591fe1307bb81764a09d Mon Sep 17 00:00:00 2001 From: qgai Date: Tue, 18 Nov 2025 06:11:13 +0000 Subject: [PATCH 2/3] unwaive and extended time for test_llmapi_speculative_decoding_mtp Signed-off-by: qgai --- tests/integration/test_lists/test-db/l0_sanity_check.yml | 2 +- tests/integration/test_lists/waives.txt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/test_lists/test-db/l0_sanity_check.yml b/tests/integration/test_lists/test-db/l0_sanity_check.yml index a5d6ee88d8e..8150d5ccb98 100644 --- a/tests/integration/test_lists/test-db/l0_sanity_check.yml +++ b/tests/integration/test_lists/test-db/l0_sanity_check.yml @@ -25,7 +25,7 @@ l0_sanity_check: - llmapi/test_llm_examples.py::test_llmapi_example_multilora - llmapi/test_llm_examples.py::test_llmapi_example_guided_decoding - llmapi/test_llm_examples.py::test_llmapi_example_logits_processor - - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_mtp + - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_mtp TIMEOUT (90) - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_eagle3 - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_ngram - llmapi/test_llm_examples.py::test_llmapi_sampling diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index b027ffea425..e3ed715c285 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -344,4 +344,3 @@ examples/test_gpt.py::test_llm_minitron_fp8_with_pseudo_loras[4b] SKIP (https:// cpp/test_e2e.py::test_benchmarks[t5-90] SKIP (https://nvbugs/5630196) full:RTX_PRO_6000_Blackwell_Server_Edition/accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype SKIP (https://nvbugs/5569696) accuracy/test_cli_flow.py::TestGpt2::test_attention_ootb SKIP (https://nvbugs/5640940) -llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_mtp SKIP (https://nvbugs/5461796) From d28b305f4503efe6320745a932dece8fc122b898 Mon Sep 17 00:00:00 2001 From: qgai Date: Tue, 18 Nov 2025 06:16:42 +0000 Subject: [PATCH 3/3] remove blank Signed-off-by: qgai --- examples/llm-api/llm_speculative_decoding.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/llm-api/llm_speculative_decoding.py b/examples/llm-api/llm_speculative_decoding.py index bbce4062973..8048bede8ee 100644 --- a/examples/llm-api/llm_speculative_decoding.py +++ b/examples/llm-api/llm_speculative_decoding.py @@ -26,6 +26,7 @@ def run_MTP(model: Optional[str] = None): model=model or "nvidia/DeepSeek-R1-FP4", speculative_config=spec_config, ) + for prompt in prompts: response = llm.generate(prompt, SamplingParams(max_tokens=10)) print(response.outputs[0].text)