diff --git a/recipes/README.md b/recipes/README.md index b8c6981dc4..0e58eeb10b 100644 --- a/recipes/README.md +++ b/recipes/README.md @@ -21,6 +21,8 @@ This repository contains production-ready recipes for deploying large language m | deepseek-r1 | sglang | disagg (1 node, wide-ep) | 8x H200 | ✅ | 🚧 |🚧 | | deepseek-r1 | sglang | disagg (multi-node, wide-ep) | 16x H200 | ✅ | 🚧 |🚧 | | gpt-oss-120b | trtllm | agg | 4x GB200 | ✅ | ✅ |🚧 | +| Qwen3-235B-A22B | trtllm | agg | 16x H200 | ✅ | ✅ |🚧 | +| Qwen3-235B-A22B | trtllm | disagg | 16x H200 | ✅ | ✅ |🚧 | **Legend:** - ✅ Functional @@ -294,4 +296,4 @@ kubectl wait --for=condition=Complete job/$PERF_JOB_NAME -n $NAMESPACE --timeout ```bash # Check final benchmark results kubectl logs job/$PERF_JOB_NAME -n $NAMESPACE | tail -50 -``` \ No newline at end of file +``` diff --git a/recipes/qwen3-235b-a22b-fp8/trtllm/disagg/deploy.yaml b/recipes/qwen3-235b-a22b-fp8/trtllm/disagg/deploy.yaml index cfaa1749c7..db44349e7f 100644 --- a/recipes/qwen3-235b-a22b-fp8/trtllm/disagg/deploy.yaml +++ b/recipes/qwen3-235b-a22b-fp8/trtllm/disagg/deploy.yaml @@ -128,8 +128,7 @@ spec: --model-path "${MODEL_PATH}" \ --served-model-name "Qwen/Qwen3-235B-A22B-FP8" \ --extra-engine-args "${ENGINE_ARGS}" \ - --disaggregation-mode prefill \ - --disaggregation-strategy prefill_first + --disaggregation-mode prefill volumeMounts: - name: prefill-config mountPath: /engine_configs @@ -180,8 +179,7 @@ spec: --model-path "${MODEL_PATH}" \ --served-model-name "Qwen/Qwen3-235B-A22B-FP8" \ --extra-engine-args "${ENGINE_ARGS}" \ - --disaggregation-mode decode \ - --disaggregation-strategy prefill_first + --disaggregation-mode decode volumeMounts: - name: decode-config mountPath: /engine_configs