Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 38 additions & 31 deletions container/Dockerfile.trtllm
Original file line number Diff line number Diff line change
Expand Up @@ -408,12 +408,14 @@ RUN uv venv $VIRTUAL_ENV --python 3.12 && \
# Common dependencies
# TODO: Remove extra install and use pyproject.toml to define all dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
uv pip install --requirement /tmp/requirements.txt
uv pip install --no-cache --requirement /tmp/requirements.txt && \
echo "uninstall (networkx packaging torch triton) as we will use NVIDIA's versions later" && \
uv pip uninstall networkx packaging torch triton

# Install test dependencies
# TODO: Remove this once we have a functional CI image built on top of the runtime image
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
uv pip install --requirement /tmp/requirements.txt
uv pip install --no-cache --requirement /tmp/requirements.txt

# Copy CUDA toolkit components needed for nvcc, cudafe, cicc etc.
COPY --from=build /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
Expand All @@ -435,29 +437,31 @@ ARG SYMPY_VER=1.14.0
ARG PACKAGING_VER=23.2
ARG FLASH_ATTN_VER=2.7.3
ARG MPMATH_VER=1.3.0
RUN uv pip list # TODO remove this line
COPY --from=build /usr/local/lib/lib* /usr/local/lib/
COPY --from=build /usr/local/lib/python3.12/dist-packages/torch /usr/local/lib/python3.12/dist-packages/torch
COPY --from=build /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/torchgen /usr/local/lib/python3.12/dist-packages/torchgen
COPY --from=build /usr/local/lib/python3.12/dist-packages/torchvision /usr/local/lib/python3.12/dist-packages/torchvision
COPY --from=build /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/torchvision.libs /usr/local/lib/python3.12/dist-packages/torchvision.libs
COPY --from=build /usr/local/lib/python3.12/dist-packages/setuptools /usr/local/lib/python3.12/dist-packages/setuptools
COPY --from=build /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/functorch /usr/local/lib/python3.12/dist-packages/functorch
COPY --from=build /usr/local/lib/python3.12/dist-packages/triton /usr/local/lib/python3.12/dist-packages/triton
COPY --from=build /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/jinja2 /usr/local/lib/python3.12/dist-packages/jinja2
COPY --from=build /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/networkx /usr/local/lib/python3.12/dist-packages/networkx
COPY --from=build /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/sympy /usr/local/lib/python3.12/dist-packages/sympy
COPY --from=build /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/packaging /usr/local/lib/python3.12/dist-packages/packaging
COPY --from=build /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/lib/python3.12/dist-packages/flash_attn
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/
COPY --from=build /usr/local/lib/python3.12/dist-packages/torch /opt/dynamo/venv/lib/python3.12/site-packages/torch
COPY --from=build /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/torch-${TORCH_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/torchgen /opt/dynamo/venv/lib/python3.12/site-packages/torchgen
COPY --from=build /usr/local/lib/python3.12/dist-packages/torchvision /opt/dynamo/venv/lib/python3.12/site-packages/torchvision
COPY --from=build /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/torchvision-${TORCHVISION_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/torchvision.libs /opt/dynamo/venv/lib/python3.12/site-packages/torchvision.libs
COPY --from=build /usr/local/lib/python3.12/dist-packages/setuptools /opt/dynamo/venv/lib/python3.12/site-packages/setuptools
COPY --from=build /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/setuptools-${SETUPTOOLS_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/functorch /opt/dynamo/venv/lib/python3.12/site-packages/functorch
COPY --from=build /usr/local/lib/python3.12/dist-packages/triton /opt/dynamo/venv/lib/python3.12/site-packages/triton
COPY --from=build /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/jinja2 /opt/dynamo/venv/lib/python3.12/site-packages/jinja2
COPY --from=build /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/jinja2-${JINJA2_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/networkx /opt/dynamo/venv/lib/python3.12/site-packages/networkx
COPY --from=build /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/networkx-${NETWORKX_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/sympy /opt/dynamo/venv/lib/python3.12/site-packages/sympy
COPY --from=build /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/sympy-${SYMPY_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/packaging /opt/dynamo/venv/lib/python3.12/site-packages/packaging
COPY --from=build /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/packaging-${PACKAGING_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn /opt/dynamo/venv/lib/python3.12/site-packages/flash_attn
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /opt/dynamo/venv/lib/python3.12/site-packages/
RUN uv pip list # TODO remove this line
Comment on lines 441 to +464
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Brittle wholesale copy from system dist-packages into venv; reduce surface to essentials and avoid ABI drift

Copying system-wide libs and many third-party packages (setuptools, packaging, networkx, sympy, jinja2, triton) into the venv is fragile:

  • Hard-coded dist-info versions will break when the base image updates.
  • /usr/local/lib/lib* can clobber runtime CUDA/OS libs and cause subtle ABI issues.
  • Mixing copied files with uv-managed packages makes upgrades, uninstalls, and debugging harder.

Prefer copying only the PyTorch stack you explicitly need (torch/torchgen/torchvision/functorch and flash_attn artifacts) and let uv manage generic Python deps. Also avoid copying triton from NGC if you intend to pin a specific triton for TRT-LLM later.

Slim the COPY set like this:

-RUN uv pip list # TODO remove this line
-COPY --from=build /usr/local/lib/lib* /usr/local/lib/
 COPY --from=build /usr/local/lib/python3.12/dist-packages/torch /opt/dynamo/venv/lib/python3.12/site-packages/torch
 COPY --from=build /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/torch-${TORCH_VER}.dist-info
 COPY --from=build /usr/local/lib/python3.12/dist-packages/torchgen /opt/dynamo/venv/lib/python3.12/site-packages/torchgen
 COPY --from=build /usr/local/lib/python3.12/dist-packages/torchvision /opt/dynamo/venv/lib/python3.12/site-packages/torchvision
 COPY --from=build /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/torchvision-${TORCHVISION_VER}.dist-info
 COPY --from=build /usr/local/lib/python3.12/dist-packages/torchvision.libs /opt/dynamo/venv/lib/python3.12/site-packages/torchvision.libs
-COPY --from=build /usr/local/lib/python3.12/dist-packages/setuptools /opt/dynamo/venv/lib/python3.12/site-packages/setuptools
-COPY --from=build /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/setuptools-${SETUPTOOLS_VER}.dist-info
 COPY --from=build /usr/local/lib/python3.12/dist-packages/functorch /opt/dynamo/venv/lib/python3.12/site-packages/functorch
-COPY --from=build /usr/local/lib/python3.12/dist-packages/triton /opt/dynamo/venv/lib/python3.12/site-packages/triton
-COPY --from=build /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
-COPY --from=build /usr/local/lib/python3.12/dist-packages/jinja2 /opt/dynamo/venv/lib/python3.12/site-packages/jinja2
-COPY --from=build /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/jinja2-${JINJA2_VER}.dist-info
-COPY --from=build /usr/local/lib/python3.12/dist-packages/networkx /opt/dynamo/venv/lib/python3.12/site-packages/networkx
-COPY --from=build /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/networkx-${NETWORKX_VER}.dist-info
-COPY --from=build /usr/local/lib/python3.12/dist-packages/sympy /opt/dynamo/venv/lib/python3.12/site-packages/sympy
-COPY --from=build /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/sympy-${SYMPY_VER}.dist-info
-COPY --from=build /usr/local/lib/python3.12/dist-packages/packaging /opt/dynamo/venv/lib/python3.12/site-packages/packaging
-COPY --from=build /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/packaging-${PACKAGING_VER}.dist-info
 COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn /opt/dynamo/venv/lib/python3.12/site-packages/flash_attn
 COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /opt/dynamo/venv/lib/python3.12/site-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
 COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /opt/dynamo/venv/lib/python3.12/site-packages/
-RUN uv pip list # TODO remove this line

If you want to future-proof the dist-info names, switch to a single RUN cp -a inside the runtime stage that uses wildcard globs (e.g., torch*, torchvision*, flash_attn*) rather than hard-coded versions.

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In container/Dockerfile.trtllm around lines 441-464, the PR currently copies a
large set of system dist-packages and /usr/local/lib/lib* into the venv using
hard-coded dist-info names, which is brittle and risks ABI drift; replace those
COPY lines so you only bring the PyTorch runtime artifacts the image actually
needs (torch, torchgen, torchvision, functorch, flash_attn and the compiled .so
artifacts) and stop copying generic packages (setuptools, networkx, sympy,
packaging, jinja2, triton, etc.) or system libs. Implement this by removing the
/usr/local/lib/lib* COPY and the hard-coded dist-info COPYs, and instead either
(A) use wildcarded COPY/RUN cp -a inside the runtime stage to copy torch*
torchvision* functorch flash_attn and their associated .so files and dist-info
globs, or (B) only copy explicit package directories for the minimal PyTorch set
and let uv handle generic Python deps; also avoid copying triton from NGC unless
you intend to pin it, and remove the temporary "uv pip list" RUN line.



# Install TensorRT-LLM (same as in build stage)
Expand All @@ -478,19 +482,22 @@ COPY --from=dev /workspace/target/release/metrics /usr/local/bin/metrics
# NOTE: locking cuda-python version to <13 to avoid breaks with tensorrt-llm 1.0.0rc4. This
# can be removed after https://github.com/NVIDIA/TensorRT-LLM/pull/6703 is merged
# we upgrade to a published pip wheel containing this change.
RUN uv pip install "cuda-python>=12,<13" && \
uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
if [ "$ARCH" = "amd64" ]; then \
pip install "triton==3.3.1"; \
fi; \
uv pip install /workspace/wheelhouse/ai_dynamo_runtime*cp312*.whl /workspace/wheelhouse/ai_dynamo*any.whl /workspace/wheelhouse/nixl*.whl
RUN uv pip list # TODO remove this line
RUN uv pip install --no-cache "cuda-python>=12,<13" && \
uv pip install --no-cache --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
uv pip install --no-cache \
/workspace/wheelhouse/ai_dynamo_runtime*cp312*.whl \
/workspace/wheelhouse/ai_dynamo*any.whl \
/workspace/wheelhouse/nixl*.whl
RUN uv pip list # TODO remove this line

Comment on lines +485 to 493
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Parity with build stage: ensure compatible triton for TRT-LLM on amd64

In build you pin triton==3.3.1 for amd64, but in runtime you don’t. Given TRT-LLM’s sensitivity to triton versions, this can lead to import/runtime failures if the copied PyTorch triton doesn’t match. Align runtime with build by installing triton==3.3.1 on amd64 after TRT-LLM.

Apply this diff:

-RUN uv pip list # TODO remove this line
-RUN uv pip install --no-cache "cuda-python>=12,<13" && \
-    uv pip install --no-cache --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
-    uv pip install --no-cache \
+RUN uv pip install --no-cache "cuda-python>=12,<13" && \
+    uv pip install --no-cache --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
+    if [ "$ARCH" = "amd64" ]; then uv pip install --no-cache "triton==3.3.1"; fi && \
+    uv pip install --no-cache \
         /workspace/wheelhouse/ai_dynamo_runtime*cp312*.whl \
         /workspace/wheelhouse/ai_dynamo*any.whl \
         /workspace/wheelhouse/nixl*.whl
-RUN uv pip list # TODO remove this line
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
RUN uv pip list # TODO remove this line
RUN uv pip install --no-cache "cuda-python>=12,<13" && \
uv pip install --no-cache --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
uv pip install --no-cache \
/workspace/wheelhouse/ai_dynamo_runtime*cp312*.whl \
/workspace/wheelhouse/ai_dynamo*any.whl \
/workspace/wheelhouse/nixl*.whl
RUN uv pip list # TODO remove this line
RUN uv pip install --no-cache "cuda-python>=12,<13" && \
uv pip install --no-cache --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
if [ "$ARCH" = "amd64" ]; then uv pip install --no-cache "triton==3.3.1"; fi && \
uv pip install --no-cache \
/workspace/wheelhouse/ai_dynamo_runtime*cp312*.whl \
/workspace/wheelhouse/ai_dynamo*any.whl \
/workspace/wheelhouse/nixl*.whl
🤖 Prompt for AI Agents
In container/Dockerfile.trtllm around lines 485 to 493, the runtime stage does
not pin triton while the build stage pins triton==3.3.1 for amd64, which can
cause import/runtime failures; add a conditional install right after the TRT-LLM
pip installs that, when building for amd64, runs pip install --no-cache
triton==3.3.1 (using the same uv pip invocation pattern and environment) so the
runtime triton version matches the build; implement the condition using the
existing architecture/build-arg mechanism in the Dockerfile (e.g., check
TARGETARCH or similar) and keep the command order so triton is installed after
the TRT-LLM wheels are installed.

# Copy benchmarks, backends and tests for CI
# TODO: Remove this once we have a functional CI image built on top of the runtime image
COPY tests /workspace/tests
COPY benchmarks /workspace/benchmarks
COPY components/backends/trtllm /workspace/components/backends/trtllm
RUN uv pip install /workspace/benchmarks
RUN uv pip install --no-cache /workspace/benchmarks
RUN uv pip list # TODO remove this line

# Copy files for legal compliance
COPY ATTRIBUTION* LICENSE /workspace/
Expand Down
Loading