From 9188830882f742d7681b7591ae8b219a88b902c9 Mon Sep 17 00:00:00 2001
From: qgai <qgai@nvidia.com>
Date: Wed, 12 Nov 2025 09:05:52 +0000
Subject: [PATCH 1/3] try to reproduce /nvbug/5461796_v2

Signed-off-by: qgai <qgai@nvidia.com>
---
 examples/llm-api/llm_speculative_decoding.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/llm-api/llm_speculative_decoding.py b/examples/llm-api/llm_speculative_decoding.py
index 8048bede8ee..bbce4062973 100644
--- a/examples/llm-api/llm_speculative_decoding.py
+++ b/examples/llm-api/llm_speculative_decoding.py
@@ -26,7 +26,6 @@ def run_MTP(model: Optional[str] = None):
         model=model or "nvidia/DeepSeek-R1-FP4",
         speculative_config=spec_config,
     )
-
     for prompt in prompts:
         response = llm.generate(prompt, SamplingParams(max_tokens=10))
         print(response.outputs[0].text)

From f49c1b2a44ff8e4f6274591fe1307bb81764a09d Mon Sep 17 00:00:00 2001
From: qgai <qgai@nvidia.com>
Date: Tue, 18 Nov 2025 06:11:13 +0000
Subject: [PATCH 2/3] unwaive and extended time for
 test_llmapi_speculative_decoding_mtp

Signed-off-by: qgai <qgai@nvidia.com>
---
 tests/integration/test_lists/test-db/l0_sanity_check.yml | 2 +-
 tests/integration/test_lists/waives.txt                  | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/integration/test_lists/test-db/l0_sanity_check.yml b/tests/integration/test_lists/test-db/l0_sanity_check.yml
index a5d6ee88d8e..8150d5ccb98 100644
--- a/tests/integration/test_lists/test-db/l0_sanity_check.yml
+++ b/tests/integration/test_lists/test-db/l0_sanity_check.yml
@@ -25,7 +25,7 @@ l0_sanity_check:
       - llmapi/test_llm_examples.py::test_llmapi_example_multilora
       - llmapi/test_llm_examples.py::test_llmapi_example_guided_decoding
       - llmapi/test_llm_examples.py::test_llmapi_example_logits_processor
-      - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_mtp
+      - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_mtp TIMEOUT (90)
       - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_eagle3
       - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_ngram
       - llmapi/test_llm_examples.py::test_llmapi_sampling
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
index b027ffea425..e3ed715c285 100644
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@@ -344,4 +344,3 @@ examples/test_gpt.py::test_llm_minitron_fp8_with_pseudo_loras[4b] SKIP (https://
 cpp/test_e2e.py::test_benchmarks[t5-90] SKIP (https://nvbugs/5630196)
 full:RTX_PRO_6000_Blackwell_Server_Edition/accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype SKIP (https://nvbugs/5569696)
 accuracy/test_cli_flow.py::TestGpt2::test_attention_ootb SKIP (https://nvbugs/5640940)
-llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_mtp SKIP (https://nvbugs/5461796)

From d28b305f4503efe6320745a932dece8fc122b898 Mon Sep 17 00:00:00 2001
From: qgai <qgai@nvidia.com>
Date: Tue, 18 Nov 2025 06:16:42 +0000
Subject: [PATCH 3/3] remove blank

Signed-off-by: qgai <qgai@nvidia.com>
---
 examples/llm-api/llm_speculative_decoding.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/llm-api/llm_speculative_decoding.py b/examples/llm-api/llm_speculative_decoding.py
index bbce4062973..8048bede8ee 100644
--- a/examples/llm-api/llm_speculative_decoding.py
+++ b/examples/llm-api/llm_speculative_decoding.py
@@ -26,6 +26,7 @@ def run_MTP(model: Optional[str] = None):
         model=model or "nvidia/DeepSeek-R1-FP4",
         speculative_config=spec_config,
     )
+
     for prompt in prompts:
         response = llm.generate(prompt, SamplingParams(max_tokens=10))
         print(response.outputs[0].text)