diff --git a/container/Dockerfile.vllm b/container/Dockerfile.vllm
index bde2200daf..2ed198e22f 100644
--- a/container/Dockerfile.vllm
+++ b/container/Dockerfile.vllm
@@ -20,7 +20,7 @@ ARG TORCH_BACKEND="cu128"
 # Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100:
 # "RuntimeError: Failed: CUDA runtime error csrc/jit/kernel_runtime.hpp:108 '98'"
 ARG DEEPGEMM_REF="03d0be3"
-ARG FLASHINF_REF="v0.2.8rc1"
+ARG FLASHINF_REF="v0.2.8.rc1"
 
 # Define general architecture ARGs for supporting both x86 and aarch64 builds.
 #   ARCH: Used for package suffixes (e.g., amd64, arm64)
diff --git a/container/deps/vllm/install_vllm.sh b/container/deps/vllm/install_vllm.sh
index f7cbdcf38f..f816363f38 100755
--- a/container/deps/vllm/install_vllm.sh
+++ b/container/deps/vllm/install_vllm.sh
@@ -167,14 +167,10 @@ python setup.py install
 
 
 # Install Flash Infer
-if [ "$ARCH" = "arm64" ]; then
-    uv pip install flashinfer-python
-else
-    cd $INSTALLATION_DIR
-    git clone https://github.com/flashinfer-ai/flashinfer.git --recursive
-    cd flashinfer
-    git checkout $FLASHINF_REF
-    python -m pip install -v .
-fi
+cd $INSTALLATION_DIR
+git clone https://github.com/flashinfer-ai/flashinfer.git --recursive
+cd flashinfer
+git checkout $FLASHINF_REF
+uv pip install -v .
 
 echo "vllm installation completed successfully"