Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
75101b6
fix: wip
mohammedabdulwahhab Aug 11, 2025
1323498
fix: fix
mohammedabdulwahhab Aug 11, 2025
78ca1cc
fix: fix
mohammedabdulwahhab Aug 11, 2025
8514e2a
Merge branch 'main' of https://github.com/ai-dynamo/dynamo into mabdu…
mohammedabdulwahhab Aug 11, 2025
903baf3
fix: refactor main component type to frontend
mohammedabdulwahhab Aug 11, 2025
86759aa
fix: tests partially fixed
mohammedabdulwahhab Aug 12, 2025
045bae6
fix: parameterize component factory with single vs multinode and fix …
mohammedabdulwahhab Aug 12, 2025
bac8c3f
fix: update vllm yamls to use defaults
mohammedabdulwahhab Aug 12, 2025
b7fb92c
fix: update sglang yamls
mohammedabdulwahhab Aug 12, 2025
a618c0c
fix: trtllm yamls
mohammedabdulwahhab Aug 12, 2025
c7d5ad4
fix: add planner component defaults
mohammedabdulwahhab Aug 12, 2025
97b96c5
fix: set planner defaults
mohammedabdulwahhab Aug 12, 2025
286069f
fix: ai lint yaml files
mohammedabdulwahhab Aug 12, 2025
a2f4110
fix: more tee removals
mohammedabdulwahhab Aug 12, 2025
2ebaf90
fix: more lint
mohammedabdulwahhab Aug 12, 2025
3a79d26
Update components/backends/vllm/deploy/disagg_planner.yaml
mohammedabdulwahhab Aug 12, 2025
fe9c153
fix: fix
mohammedabdulwahhab Aug 12, 2025
4cf9394
Merge branch 'mabdulwahhab/defaults' of https://github.com/ai-dynamo/…
mohammedabdulwahhab Aug 12, 2025
880d2b4
fix: fix merge conflicts
mohammedabdulwahhab Aug 12, 2025
b9f9c43
fix: remove backend param
mohammedabdulwahhab Aug 12, 2025
d3eb5d3
Apply suggestions from code review
mohammedabdulwahhab Aug 12, 2025
352a4e7
fix: remove multinode guard and fix tests
mohammedabdulwahhab Aug 12, 2025
04919b7
Merge branch 'mabdulwahhab/defaults' of https://github.com/ai-dynamo/…
mohammedabdulwahhab Aug 12, 2025
1a58890
fix: fix role
mohammedabdulwahhab Aug 12, 2025
e84d253
fix: planner should add a service account
mohammedabdulwahhab Aug 13, 2025
cbd90e9
fix: add startup probe overrides, add checkMainContainerOverrides
mohammedabdulwahhab Aug 13, 2025
042092e
fix: restore prometheus comp in disagg_planner to use componentType f…
mohammedabdulwahhab Aug 13, 2025
0a738a8
Merge branch 'main' of https://github.com/ai-dynamo/dynamo into mabdu…
mohammedabdulwahhab Aug 13, 2025
66dbc51
fix: update prometheus for sglang as well
mohammedabdulwahhab Aug 13, 2025
d5f6b2d
Merge branch 'main' of https://github.com/ai-dynamo/dynamo into mabdu…
mohammedabdulwahhab Aug 13, 2025
1a05dab
fix: remove validate main container
mohammedabdulwahhab Aug 14, 2025
bf8db83
Merge branch 'main' of https://github.com/ai-dynamo/dynamo into mabdu…
mohammedabdulwahhab Aug 14, 2025
e54b451
fix: fix sglang disagg planner
mohammedabdulwahhab Aug 14, 2025
20e84e5
Apply suggestions from code review
mohammedabdulwahhab Aug 14, 2025
117b0ce
Merge branch 'main' of https://github.com/ai-dynamo/dynamo into mabdu…
mohammedabdulwahhab Aug 14, 2025
7aa3627
Merge branch 'mabdulwahhab/defaults' of https://github.com/ai-dynamo/…
mohammedabdulwahhab Aug 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix: trtllm yamls
  • Loading branch information
mohammedabdulwahhab committed Aug 12, 2025
commit a618c0ce49c13f48639127dfeb6a122911509063
44 changes: 3 additions & 41 deletions components/backends/trtllm/deploy/agg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,8 @@ spec:
Frontend:
dynamoNamespace: trtllm-agg
componentType: frontend
livenessProbe:
exec:
command:
- /bin/sh
- -c
- 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
readinessProbe:
exec:
command:
- /bin/sh
- -c
- 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 3
failureThreshold: 10
replicas: 1
# Override default resources
resources:
requests:
cpu: "5"
Expand All @@ -48,23 +30,10 @@ spec:
- "python3 -m dynamo.frontend --http-port 8000"
TRTLLMWorker:
envFromSecret: hf-token-secret
livenessProbe:
httpGet:
path: /live
port: 9090
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
readinessProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 3
failureThreshold: 60
dynamoNamespace: trtllm-agg
componentType: worker
replicas: 1
# Override default resources
resources:
requests:
cpu: "10"
Expand All @@ -74,13 +43,6 @@ spec:
cpu: "10"
memory: "20Gi"
gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
extraPodSpec:
mainContainer:
startupProbe:
Expand All @@ -101,4 +63,4 @@ spec:
- "--served-model-name"
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
- "--extra-engine-args"
- "engine_configs/agg.yaml"
- "engine_configs/agg.yaml"
50 changes: 2 additions & 48 deletions components/backends/trtllm/deploy/agg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,35 +8,9 @@ metadata:
spec:
services:
Frontend:
livenessProbe:
exec:
command:
- /bin/sh
- -c
- 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
readinessProbe:
exec:
command:
- /bin/sh
- -c
- 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 3
failureThreshold: 5
dynamoNamespace: trtllm-agg-router
componentType: frontend
replicas: 1
resources:
requests:
cpu: "1"
memory: "2Gi"
limits:
cpu: "1"
memory: "2Gi"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
Expand All @@ -48,23 +22,10 @@ spec:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
TRTLLMWorker:
envFromSecret: hf-token-secret
livenessProbe:
httpGet:
path: /live
port: 9090
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
readinessProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 3
failureThreshold: 60
dynamoNamespace: trtllm-agg-router
componentType: worker
replicas: 2
# Override default resources
resources:
requests:
cpu: "10"
Expand All @@ -74,13 +35,6 @@ spec:
cpu: "10"
memory: "20Gi"
gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
extraPodSpec:
mainContainer:
startupProbe:
Expand All @@ -102,4 +56,4 @@ spec:
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
- "--extra-engine-args"
- "engine_configs/agg.yaml"
- "--publish-events-and-metrics"
- "--publish-events-and-metrics"
66 changes: 4 additions & 62 deletions components/backends/trtllm/deploy/disagg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,8 @@ spec:
Frontend:
dynamoNamespace: trtllm-disagg
componentType: frontend
livenessProbe:
exec:
command:
- /bin/sh
- -c
- 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
readinessProbe:
exec:
command:
- /bin/sh
- -c
- 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 3
failureThreshold: 10
replicas: 1
# Override default resources
resources:
requests:
cpu: "5"
Expand All @@ -51,20 +33,7 @@ spec:
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
livenessProbe:
httpGet:
path: /live
port: 9090
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
readinessProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 3
failureThreshold: 60
# Override default resources
resources:
requests:
cpu: "10"
Expand All @@ -90,32 +59,12 @@ spec:
- -c
args:
- "python3 -m dynamo.trtllm --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B --extra-engine-args engine_configs/prefill.yaml --disaggregation-mode prefill --disaggregation-strategy decode_first 2>&1 | tee /tmp/trtllm.log"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
TRTLLMDecodeWorker:
dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
livenessProbe:
httpGet:
path: /live
port: 9090
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
readinessProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 3
failureThreshold: 60
# Override default resources
resources:
requests:
cpu: "10"
Expand All @@ -140,11 +89,4 @@ spec:
- /bin/sh
- -c
args:
- "python3 -m dynamo.trtllm --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B --extra-engine-args engine_configs/decode.yaml --disaggregation-mode decode --disaggregation-strategy decode_first 2>&1 | tee /tmp/trtllm.log"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
- "python3 -m dynamo.trtllm --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B --extra-engine-args engine_configs/decode.yaml --disaggregation-mode decode --disaggregation-strategy decode_first 2>&1 | tee /tmp/trtllm.log"
66 changes: 4 additions & 62 deletions components/backends/trtllm/deploy/disagg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,8 @@ spec:
Frontend:
dynamoNamespace: trtllm-v1-disagg-router
componentType: frontend
livenessProbe:
exec:
command:
- /bin/sh
- -c
- 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
readinessProbe:
exec:
command:
- /bin/sh
- -c
- 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 3
failureThreshold: 10
replicas: 1
# Override default resources
resources:
requests:
cpu: "5"
Expand All @@ -51,20 +33,7 @@ spec:
envFromSecret: hf-token-secret
componentType: worker
replicas: 2
livenessProbe:
httpGet:
path: /live
port: 9090
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
readinessProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 3
failureThreshold: 60
# Override default resources
resources:
requests:
cpu: "10"
Expand All @@ -90,32 +59,12 @@ spec:
- -c
args:
- "python3 -m dynamo.trtllm --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B --extra-engine-args engine_configs/prefill.yaml --disaggregation-mode prefill --disaggregation-strategy prefill_first --publish-events-and-metrics 2>&1 | tee /tmp/trtllm.log"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
TRTLLMDecodeWorker:
dynamoNamespace: trtllm-v1-disagg-router
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
livenessProbe:
httpGet:
path: /live
port: 9090
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
readinessProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 3
failureThreshold: 60
# Override default resources
resources:
requests:
cpu: "10"
Expand All @@ -140,11 +89,4 @@ spec:
- /bin/sh
- -c
args:
- "python3 -m dynamo.trtllm --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B --extra-engine-args engine_configs/decode.yaml --disaggregation-mode decode --disaggregation-strategy prefill_first 2>&1 | tee /tmp/trtllm.log"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
- "python3 -m dynamo.trtllm --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B --extra-engine-args engine_configs/decode.yaml --disaggregation-mode decode --disaggregation-strategy prefill_first 2>&1 | tee /tmp/trtllm.log"