From 6cd10daebeabd0ce0c3c1ed495b32b704dcbed01 Mon Sep 17 00:00:00 2001 From: Elnifio Date: Mon, 17 Nov 2025 10:40:25 -0800 Subject: [PATCH 1/2] removes unnecessary strategy Signed-off-by: Elnifio --- recipes/qwen3-235b-a22b-fp8/trtllm/disagg/deploy.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/recipes/qwen3-235b-a22b-fp8/trtllm/disagg/deploy.yaml b/recipes/qwen3-235b-a22b-fp8/trtllm/disagg/deploy.yaml index cfaa1749c7..db44349e7f 100644 --- a/recipes/qwen3-235b-a22b-fp8/trtllm/disagg/deploy.yaml +++ b/recipes/qwen3-235b-a22b-fp8/trtllm/disagg/deploy.yaml @@ -128,8 +128,7 @@ spec: --model-path "${MODEL_PATH}" \ --served-model-name "Qwen/Qwen3-235B-A22B-FP8" \ --extra-engine-args "${ENGINE_ARGS}" \ - --disaggregation-mode prefill \ - --disaggregation-strategy prefill_first + --disaggregation-mode prefill volumeMounts: - name: prefill-config mountPath: /engine_configs @@ -180,8 +179,7 @@ spec: --model-path "${MODEL_PATH}" \ --served-model-name "Qwen/Qwen3-235B-A22B-FP8" \ --extra-engine-args "${ENGINE_ARGS}" \ - --disaggregation-mode decode \ - --disaggregation-strategy prefill_first + --disaggregation-mode decode volumeMounts: - name: decode-config mountPath: /engine_configs From fdc4659d81e9e5cf6d126da4ff74f4aa0f8f521a Mon Sep 17 00:00:00 2001 From: Elnifio Date: Mon, 17 Nov 2025 10:42:04 -0800 Subject: [PATCH 2/2] updates README Signed-off-by: Elnifio --- recipes/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/recipes/README.md b/recipes/README.md index b8c6981dc4..0e58eeb10b 100644 --- a/recipes/README.md +++ b/recipes/README.md @@ -21,6 +21,8 @@ This repository contains production-ready recipes for deploying large language m | deepseek-r1 | sglang | disagg (1 node, wide-ep) | 8x H200 | ✅ | 🚧 |🚧 | | deepseek-r1 | sglang | disagg (multi-node, wide-ep) | 16x H200 | ✅ | 🚧 |🚧 | | gpt-oss-120b | trtllm | agg | 4x GB200 | ✅ | ✅ |🚧 | +| Qwen3-235B-A22B | trtllm | agg | 16x H200 | ✅ | ✅ |🚧 | +| Qwen3-235B-A22B | trtllm | disagg | 16x H200 | ✅ | ✅ |🚧 | **Legend:** - ✅ Functional @@ -294,4 +296,4 @@ kubectl wait --for=condition=Complete job/$PERF_JOB_NAME -n $NAMESPACE --timeout ```bash # Check final benchmark results kubectl logs job/$PERF_JOB_NAME -n $NAMESPACE | tail -50 -``` \ No newline at end of file +```