Skip to content

Commit 47d05d7

Browse files
build: DIS-148 use the tensorrt_llm public wheel from pypi by default in container build (#1525)
1 parent ce48a86 commit 47d05d7

File tree

2 files changed

+36
-2
lines changed

2 files changed

+36
-2
lines changed

container/build.sh

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,14 +88,15 @@ TENSORRTLLM_PIP_WHEEL_DIR="/tmp/trtllm_wheel/"
8888
# TensorRT-LLM commit to use for building the trtllm wheel if not provided.
8989
# Important Note: This commit is not used in our CI pipeline. See the CI
9090
# variables to learn how to run a pipeline with a specific commit.
91-
TRTLLM_COMMIT="137fe35539ea182f1495f5021bfda97c729e50c3"
91+
DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="137fe35539ea182f1495f5021bfda97c729e50c3"
92+
TRTLLM_COMMIT=""
9293

9394
# TensorRT-LLM PyPI index URL
9495
TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
96+
DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==0.21.0rc0"
9597
TENSORRTLLM_PIP_WHEEL=""
9698

9799

98-
99100
VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
100101
# FIXME: NCCL will hang with 25.03, so use 25.01 for now
101102
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
@@ -158,6 +159,13 @@ get_options() {
158159
missing_requirement "$1"
159160
fi
160161
;;
162+
--use-default-experimental-tensorrtllm-commit)
163+
if [ -n "$2" ] && [[ "$2" != --* ]]; then
164+
echo "ERROR: --use-default-experimental-tensorrtllm-commit does not take any argument"
165+
exit 1
166+
fi
167+
USE_DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT=true
168+
;;
161169
--tensorrtllm-pip-wheel)
162170
if [ "$2" ]; then
163171
TENSORRTLLM_PIP_WHEEL=$2
@@ -344,6 +352,7 @@ show_help() {
344352
echo " [--framework framework one of ${!FRAMEWORKS[*]}]"
345353
echo " [--tensorrtllm-pip-wheel-dir path to tensorrtllm pip wheel directory]"
346354
echo " [--tensorrtllm-commit tensorrtllm commit to use for building the trtllm wheel if the wheel is not provided]"
355+
echo " [--use-default-experimental-tensorrtllm-commit] Use the default experimental commit (${DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT}) to build TensorRT-LLM. This is a flag (no argument). Do not combine with --tensorrtllm-commit or --tensorrtllm-pip-wheel."
347356
echo " [--tensorrtllm-pip-wheel tensorrtllm pip wheel on artifactory]"
348357
echo " [--tensorrtllm-index-url tensorrtllm PyPI index URL if providing the wheel from artifactory]"
349358
echo " [--build-arg additional build args to pass to docker build]"
@@ -475,6 +484,19 @@ check_wheel_file() {
475484
}
476485

477486
if [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
487+
if [ "$USE_DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT" = true ]; then
488+
if [ -n "$TRTLLM_COMMIT" ] || [ -n "$TENSORRTLLM_PIP_WHEEL" ]; then
489+
echo "ERROR: When using --use-default-experimental-trtllm-commit, do not set --tensorrtllm-commit or --tensorrtllm-pip-wheel."
490+
exit 1
491+
fi
492+
TRTLLM_COMMIT="$DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT"
493+
fi
494+
495+
# If user didn't set both wheel and commit, use default tensorrt_llm pip wheel
496+
if [ -z "$TENSORRTLLM_PIP_WHEEL" ] && [ -z "$TRTLLM_COMMIT" ]; then
497+
TENSORRTLLM_PIP_WHEEL="$DEFAULT_TENSORRTLLM_PIP_WHEEL"
498+
fi
499+
478500
if [ -z "${TENSORRTLLM_PIP_WHEEL}" ]; then
479501
# Use option 1
480502
if [ ! -d "${TENSORRTLLM_PIP_WHEEL_DIR}" ]; then

examples/tensorrt_llm/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ apt-get update && apt-get -y install git git-lfs
6262

6363
# On an ARM machine:
6464
./container/build.sh --framework tensorrtllm --platform linux/arm64
65+
66+
# Build the container with the default experimental TensorRT-LLM commit
67+
# WARNING: This is for experimental feature testing only.
68+
# The container should not be used in a production environment.
69+
./container/build.sh --framework tensorrtllm --use-default-experimental-tensorrtllm-commit
6570
```
6671

6772
> [!NOTE]
@@ -136,6 +141,10 @@ dynamo serve graphs.agg:Frontend -f configs/deepseek_r1/mtp/mtp_agg.yaml
136141
```
137142

138143
Notes:
144+
- MTP is only available within the container built with the experimental TensorRT-LLM commit. Please add --use-default-experimental-tensorrtllm-commit to the arguments of the build.sh script.
145+
146+
Example: `./container/build.sh --framework tensorrtllm --use-default-experimental-tensorrtllm-commit`
147+
139148
- There is a noticeable latency for the first two inference requests. Please send warm-up requests before starting the benchmark.
140149
- MTP performance may vary depending on the acceptance rate of predicted tokens, which is dependent on the dataset or queries used while benchmarking. Additionally, `ignore_eos` should generally be omitted or set to `false` when using MTP to avoid speculating garbage outputs and getting unrealistic acceptance rates.
141150

@@ -275,6 +284,9 @@ dynamo serve components.prefill_worker:TensorRTLLMPrefillWorker -f configs/deeps
275284
```
276285

277286
Notes:
287+
- MTP is only available within the container built with the experimental TensorRT-LLM commit. Please add --use-default-experimental-tensorrtllm-commit to the arguments of the build.sh script.
288+
289+
Example: `./container/build.sh --framework tensorrtllm --use-default-experimental-tensorrtllm-commit`
278290
- There is a noticeable latency for the first two inference requests. Please send warm-up requests before starting the benchmark.
279291
- MTP performance may vary depending on the acceptance rate of predicted tokens, which is dependent on the dataset or queries used while benchmarking. Additionally, `ignore_eos` should generally be omitted or set to `false` when using MTP to avoid speculating garbage outputs and getting unrealistic acceptance rates.
280292

0 commit comments

Comments
 (0)