diff --git a/container/Dockerfile.tensorrt_llm b/container/Dockerfile.tensorrt_llm index 2605fa3471..2293b6ffb7 100644 --- a/container/Dockerfile.tensorrt_llm +++ b/container/Dockerfile.tensorrt_llm @@ -373,12 +373,25 @@ CMD [] FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime +WORKDIR /workspace + ARG ARCH_ALT -WORKDIR /workspace ENV DYNAMO_HOME=/workspace ENV VIRTUAL_ENV=/opt/dynamo/venv ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" +ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl +ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu +ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins +ENV LD_LIBRARY_PATH=\ +$NIXL_LIB_DIR:\ +$NIXL_PLUGIN_DIR:\ +/usr/local/ucx/lib:\ +/usr/local/ucx/lib/ucx:\ +/opt/hpcx/ompi/lib:\ +$LD_LIBRARY_PATH +ENV PATH=/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH +ENV OPAL_PREFIX=/opt/hpcx/ompi # Install apt dependencies # openssh-client, openssh-server are needed for OpenMPI @@ -473,21 +486,6 @@ COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/ COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/ -# Setup environment variables -ARG ARCH_ALT -ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl -ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu -ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins - -ENV LD_LIBRARY_PATH=\ -$NIXL_LIB_DIR:\ -$NIXL_PLUGIN_DIR:\ -/usr/local/ucx/lib:\ -/usr/local/ucx/lib/ucx:\ -/opt/hpcx/ompi/lib:\ -$LD_LIBRARY_PATH -ENV PATH=/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH -ENV OPAL_PREFIX=/opt/hpcx/ompi # Install TensorRT-LLM (same as in build stage) ARG HAS_TRTLLM_CONTEXT=0 @@ -496,8 +494,8 @@ ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple" # Copy Dynamo wheels into wheelhouse # Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel -COPY --from=dev /workspace/wheels/nixl/*.whl wheelhouse/ -COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/ +COPY --from=dev /workspace/wheels/nixl/*.whl /workspace/wheelhouse/ +COPY --from=wheel_builder /workspace/dist/*.whl /workspace/wheelhouse/ COPY --from=dev /workspace/target/release/metrics /usr/local/bin/metrics # NOTE: If a package (tensorrt_llm) exists on both --index-url and --extra-index-url, @@ -508,7 +506,7 @@ RUN uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_P if [ "$ARCH" = "amd64" ]; then \ pip install "triton==3.3.1"; \ fi; \ - uv pip install ai-dynamo nixl --find-links wheelhouse + uv pip install ai-dynamo nixl --find-links /workspace/wheelhouse # Setup TRTLLM environment variables, same as in dev image ENV TRTLLM_USE_UCX_KVCACHE=1