Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 38 additions & 14 deletions container/Dockerfile.vllm
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,13 @@ WORKDIR /workspace

### NIXL SETUP ###
# Copy nixl source, and use commit hash as cache hint
# TEMP: disable gds backend for arm64
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
RUN if [ "$ARCH" = "arm64" ]; then \
cd /opt/nixl && \
mkdir build && \
meson setup build/ --buildtype=release --prefix=/usr/local/nixl -Dgds_path=/usr/local/cuda/targets/sbsa-linux && \
meson setup build/ --buildtype=release --prefix=/usr/local/nixl -Ddisable_gds_backend=true -Dgds_path=/usr/local/cuda/targets/sbsa-linux && \
cd build/ && \
ninja && \
ninja install; \
Expand Down Expand Up @@ -163,8 +164,10 @@ ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

# Install NIXL Python module
# TODO: Move gds_path selection based on arch into NIXL build
# TEMP: disable gds backend for arm64
RUN if [ "$ARCH" = "arm64" ]; then \
cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl \
--config-settings=setup-args="-Ddisable_gds_backend=true" \
--config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \
else \
cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl; \
Expand All @@ -177,22 +180,43 @@ RUN uv pip install /workspace/wheels/nixl/*.whl
# Install vllm - keep this early in Dockerfile to avoid
# rebuilds from unrelated source code changes
ARG VLLM_REF="059d4cd"
ARG MAX_JOBS=16
ENV MAX_JOBS=$MAX_JOBS
ENV CUDA_HOME=/usr/local/cuda
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
--mount=type=cache,target=/root/.cache/uv \
uv pip install pip cuda-python && \
mkdir /opt/vllm && \
cd /opt/vllm && \
git clone https://github.com/vllm-project/vllm.git && \
cd vllm && \
git checkout $VLLM_REF && \
VLLM_USE_PRECOMPILED=1 uv pip install -e . && \
cd tools/ep_kernels && \
bash install_python_libraries.sh && \
cd ep_kernels_workspace && \
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
cd DeepGEMM && \
python setup.py install
if [ "$ARCH" = "arm64" ]; then \
uv pip install pip cuda-python && \
mkdir /opt/vllm && \
cd /opt/vllm && \
git clone https://github.com/vllm-project/vllm.git && \
cd vllm && \
git checkout $VLLM_REF && \
uv pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128 && \
python use_existing_torch.py && \
uv pip install -r requirements/build.txt && \
MAX_JOBS=${MAX_JOBS} uv pip install --no-build-isolation -e . -v && \
cd tools/ep_kernels && \
bash install_python_libraries.sh && \
cd ep_kernels_workspace && \
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
cd DeepGEMM && \
python setup.py install; \
else \
uv pip install pip cuda-python && \
mkdir /opt/vllm && \
cd /opt/vllm && \
git clone https://github.com/vllm-project/vllm.git && \
cd vllm && \
git checkout $VLLM_REF && \
VLLM_USE_PRECOMPILED=1 uv pip install -e . && \
cd tools/ep_kernels && \
bash install_python_libraries.sh && \
cd ep_kernels_workspace && \
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
cd DeepGEMM && \
python setup.py install; \
fi

# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
Expand Down
2 changes: 2 additions & 0 deletions container/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,8 @@ ARCH="amd64"
if [[ "$PLATFORM" == *"linux/arm64"* ]]; then
ARCH="arm64"
BUILD_ARGS+=" --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 "
# TEMP: Pin to nixl 0.3.1 for arm build, since 0.4.0 fails
NIXL_COMMIT=3503658e71143b56f9d5b1b440d84a94b9c41af8
fi

# Update DOCKERFILE if framework is VLLM
Expand Down
Loading