Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion container/Dockerfile.vllm
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ ARG TORCH_BACKEND="cu128"
# Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100:
# "RuntimeError: Failed: CUDA runtime error csrc/jit/kernel_runtime.hpp:108 '98'"
ARG DEEPGEMM_REF="03d0be3"
ARG FLASHINF_REF="v0.2.8rc1"
ARG FLASHINF_REF="v0.2.8.rc1"

# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
Expand Down
14 changes: 5 additions & 9 deletions container/deps/vllm/install_vllm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -167,14 +167,10 @@ python setup.py install


# Install Flash Infer
if [ "$ARCH" = "arm64" ]; then
uv pip install flashinfer-python
else
cd $INSTALLATION_DIR
git clone https://github.com/flashinfer-ai/flashinfer.git --recursive
cd flashinfer
git checkout $FLASHINF_REF
python -m pip install -v .
fi
cd $INSTALLATION_DIR
git clone https://github.com/flashinfer-ai/flashinfer.git --recursive
cd flashinfer
git checkout $FLASHINF_REF
uv pip install -v .

echo "vllm installation completed successfully"
Loading