diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py index 2cff5f6af67..08ab7dbc83e 100644 --- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py +++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py @@ -2024,7 +2024,6 @@ def test_bf16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph, task = MMLU(self.MODEL_NAME) task.evaluate(llm) - @pytest.mark.skip_less_device_memory(140000) ## OOM on 80G H100 @parametrize_with_ids("eagle3_one_model", [True, False]) @parametrize_with_ids("enable_chunked_prefill", [False, True]) def test_eagle3(self, enable_chunked_prefill, eagle3_one_model): @@ -2032,7 +2031,10 @@ def test_eagle3(self, enable_chunked_prefill, eagle3_one_model): disable_overlap_scheduler=True, cuda_graph_config=CudaGraphConfig(batch_sizes=[1]), ) - kv_cache_config = KvCacheConfig(enable_block_reuse=False) + kv_cache_config = KvCacheConfig( + enable_block_reuse=False, + free_gpu_memory_fraction=0.6, + ) eagle_model_dir = f"{llm_models_root()}/Qwen3/qwen3_8b_eagle3" target_model_dir = f"{llm_models_root()}/Qwen3/Qwen3-8B"