You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
echo"ERROR: --use-default-experimental-tensorrtllm-commit does not take any argument"
165
+
exit 1
166
+
fi
167
+
USE_DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT=true
168
+
;;
161
169
--tensorrtllm-pip-wheel)
162
170
if [ "$2" ];then
163
171
TENSORRTLLM_PIP_WHEEL=$2
@@ -344,6 +352,7 @@ show_help() {
344
352
echo" [--framework framework one of ${!FRAMEWORKS[*]}]"
345
353
echo" [--tensorrtllm-pip-wheel-dir path to tensorrtllm pip wheel directory]"
346
354
echo" [--tensorrtllm-commit tensorrtllm commit to use for building the trtllm wheel if the wheel is not provided]"
355
+
echo" [--use-default-experimental-tensorrtllm-commit] Use the default experimental commit (${DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT}) to build TensorRT-LLM. This is a flag (no argument). Do not combine with --tensorrtllm-commit or --tensorrtllm-pip-wheel."
347
356
echo" [--tensorrtllm-pip-wheel tensorrtllm pip wheel on artifactory]"
348
357
echo" [--tensorrtllm-index-url tensorrtllm PyPI index URL if providing the wheel from artifactory]"
349
358
echo" [--build-arg additional build args to pass to docker build]"
@@ -475,6 +484,19 @@ check_wheel_file() {
475
484
}
476
485
477
486
if [[ $FRAMEWORK=="TENSORRTLLM" ]];then
487
+
if [ "$USE_DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT"=true ];then
488
+
if [ -n"$TRTLLM_COMMIT" ] || [ -n"$TENSORRTLLM_PIP_WHEEL" ];then
489
+
echo"ERROR: When using --use-default-experimental-trtllm-commit, do not set --tensorrtllm-commit or --tensorrtllm-pip-wheel."
@@ -136,6 +141,10 @@ dynamo serve graphs.agg:Frontend -f configs/deepseek_r1/mtp/mtp_agg.yaml
136
141
```
137
142
138
143
Notes:
144
+
- MTP is only available within the container built with the experimental TensorRT-LLM commit. Please add --use-default-experimental-tensorrtllm-commit to the arguments of the build.sh script.
- There is a noticeable latency for the first two inference requests. Please send warm-up requests before starting the benchmark.
140
149
- MTP performance may vary depending on the acceptance rate of predicted tokens, which is dependent on the dataset or queries used while benchmarking. Additionally, `ignore_eos` should generally be omitted or set to `false` when using MTP to avoid speculating garbage outputs and getting unrealistic acceptance rates.
141
150
@@ -275,6 +284,9 @@ dynamo serve components.prefill_worker:TensorRTLLMPrefillWorker -f configs/deeps
275
284
```
276
285
277
286
Notes:
287
+
- MTP is only available within the container built with the experimental TensorRT-LLM commit. Please add --use-default-experimental-tensorrtllm-commit to the arguments of the build.sh script.
- There is a noticeable latency for the first two inference requests. Please send warm-up requests before starting the benchmark.
279
291
- MTP performance may vary depending on the acceptance rate of predicted tokens, which is dependent on the dataset or queries used while benchmarking. Additionally, `ignore_eos` should generally be omitted or set to `false` when using MTP to avoid speculating garbage outputs and getting unrealistic acceptance rates.
0 commit comments