diff --git a/README.md b/README.md index 759a9187d8..6a8fa61890 100644 --- a/README.md +++ b/README.md @@ -167,10 +167,15 @@ To specify which GPUs to use set environment variable `CUDA_VISIBLE_DEVICES`. ## SGLang + ``` -# Install libnuma +# Install libnuma-dev apt install -y libnuma-dev +# Install flashinfer-python pre-release (required by sglang for optimized inference) +uv pip install "flashinfer-python==0.2.9rc2" --prerelease=allow + +# Install ai-dynamo with sglang support uv pip install ai-dynamo[sglang] ``` diff --git a/container/Dockerfile.sglang b/container/Dockerfile.sglang index 8557684096..620cad6a3e 100644 --- a/container/Dockerfile.sglang +++ b/container/Dockerfile.sglang @@ -378,8 +378,6 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ echo "cat ~/.launch_screen" >> ~/.bashrc -ENV PYTHONPATH=/workspace/dynamo/components/planner/src:/workspace/examples/sglang/utils:$PYTHONPATH - ######################################## ########## Development Image ########### ######################################## @@ -446,7 +444,10 @@ RUN apt-get update && \ COPY --from=ci_minimum /workspace/target/release/metrics /usr/local/bin/metrics COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/ COPY --from=base /workspace/wheels/nixl/*.whl wheelhouse/ -RUN uv pip install ai-dynamo[sglang] --find-links wheelhouse + +# Install flashinfer-python pre-release version separately, then install ai-dynamo with sglang support +RUN uv pip install "flashinfer-python==0.2.9rc2" --prerelease=allow && \ + uv pip install "ai-dynamo[sglang]" --find-links wheelhouse # Copy launch banner RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \ @@ -466,7 +467,5 @@ RUN uv pip install /workspace/benchmarks # Copy attribution files COPY ATTRIBUTION* LICENSE /workspace/ -ENV PYTHONPATH=/workspace/examples/sglang/utils:$PYTHONPATH - ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD [] diff --git a/container/Dockerfile.sglang-wideep b/container/Dockerfile.sglang-wideep index 0bbcb3af23..c8746ceb9d 100644 --- a/container/Dockerfile.sglang-wideep +++ b/container/Dockerfile.sglang-wideep @@ -121,7 +121,7 @@ ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} RUN cargo build --release RUN cd lib/bindings/python && pip install --break-system-packages -e . && cd ../../.. -RUN pip install --break-system-packages -e . +RUN pip install --break-system-packages . RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.28/nats-server-v2.10.28-${ARCH}.deb && \ dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb