diff --git a/container/Dockerfile.sglang b/container/Dockerfile.sglang index bff39a2dfe..03f9bb6545 100644 --- a/container/Dockerfile.sglang +++ b/container/Dockerfile.sglang @@ -2,497 +2,87 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -# This section contains build arguments that are common and shared with -# the plain Dockerfile, so they should NOT have a default. The source of truth is from build.sh. - -ARG BASE_IMAGE -ARG BASE_IMAGE_TAG - -ARG FRAMEWORK_IMAGE -ARG FRAMEWORK_IMAGE_TAG -ARG PYTHON_VERSION -ARG CUDA_VERSION - -ARG ARCH=amd64 -ARG ARCH_ALT=x86_64 -ARG CARGO_BUILD_JOBS - -# sccache configuration - inherit from base build -ARG USE_SCCACHE -ARG SCCACHE_BUCKET="" -ARG SCCACHE_REGION="" - -ARG DYNAMO_BASE_IMAGE="dynamo:latest-none" -FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base - -######################################################## -########## Framework Development Image ################ -######################################################## -# -# PURPOSE: Framework development and SGLang/DeepEP/NVSHMEM compilation -# -# This stage builds and compiles framework dependencies including: -# - SGLang inference engine with CUDA support -# - DeepEP and NVSHMEM -# - All necessary build tools and compilation dependencies -# - Framework-level Python packages and extensions -# -# Use this stage when you need to: -# - Build SGLang from source with custom modifications -# - Develop or debug framework-level components -# - Create custom builds with specific optimization flags -# -#FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu24.04 AS framework -FROM ${FRAMEWORK_IMAGE}:${FRAMEWORK_IMAGE_TAG} AS framework - -# Declare all ARGs -ARG BUILD_TYPE=all -ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee -ARG DEEPEP_GB_COMMIT=1b14ad661c7640137fcfe93cccb2694ede1220b0 -ARG CMAKE_BUILD_PARALLEL_LEVEL=2 -ARG SGL_KERNEL_VERSION=0.3.16.post5 -ARG SGLANG_COMMIT=0.5.4.post3 -ARG GDRCOPY_COMMIT=v2.4.4 -ARG NVSHMEM_VERSION=3.3.9 -ARG GRACE_BLACKWELL=false -ARG ARCH -ARG ARCH_ALT -ARG PYTHON_VERSION -ARG USE_SCCACHE -ARG SCCACHE_BUCKET -ARG SCCACHE_REGION +ARG SGLANG_IMAGE_TAG="v0.5.6.post1" +# Either 12 or 13 +ARG CUDA_VERSION="12" +ARG BRANCH_TYPE +# PyPI version to install when BRANCH_TYPE is not set +ARG DYNAMO_VERSION="" ARG CARGO_BUILD_JOBS -ARG CUDA_VERSION - -# Set all environment variables -ENV DEBIAN_FRONTEND=noninteractive \ - TZ=America/Los_Angeles \ - CUDA_HOME=/usr/local/cuda \ - GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \ - NVSHMEM_DIR=/sgl-workspace/nvshmem/install \ - PATH="${PATH}:/usr/local/nvidia/bin" \ - LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib:/usr/local/nvidia/lib64" \ - LANG=en_US.UTF-8 \ - LANGUAGE=en_US:en \ - LC_ALL=en_US.UTF-8 -# Combined: Python setup, locale, and all package installation -RUN apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends software-properties-common \ - && add-apt-repository ppa:deadsnakes/ppa -y \ - && apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - # Python (using other python versions as needed) - python${PYTHON_VERSION}-dev \ - python${PYTHON_VERSION}-venv \ - python${PYTHON_VERSION}-distutils \ - python3-pip \ - # Build essentials - build-essential \ - cmake \ - ninja-build \ - ccache \ - patchelf \ - git \ - git-lfs \ - # Core system utilities - tzdata \ - locales \ - ca-certificates \ - dkms \ - kmod \ - # Command line tools - wget \ - curl \ - jq \ - unzip \ - # Network utilities - netcat-openbsd \ - # SSL and pkg-config - libssl-dev \ - pkg-config \ - # MPI and NUMA - libopenmpi-dev \ - libnuma1 \ - libnuma-dev \ - numactl \ - # InfiniBand/RDMA - libibverbs-dev \ - libibverbs1 \ - libibumad3 \ - librdmacm1 \ - libnl-3-200 \ - libnl-route-3-200 \ - libnl-route-3-dev \ - libnl-3-dev \ - ibverbs-providers \ - infiniband-diags \ - perftest \ - # Development libraries - libgoogle-glog-dev \ - libgtest-dev \ - libjsoncpp-dev \ - libunwind-dev \ - libboost-all-dev \ - libgrpc-dev \ - libgrpc++-dev \ - libprotobuf-dev \ - protobuf-compiler \ - protobuf-compiler-grpc \ - pybind11-dev \ - libhiredis-dev \ - libcurl4-openssl-dev \ - libczmq4 \ - libczmq-dev \ - libfabric-dev \ - # Package building tools - devscripts \ - debhelper \ - fakeroot \ - check \ - libsubunit0 \ - libsubunit-dev \ - # Set Python alternatives - && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ - && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \ - && update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 \ - && update-alternatives --set python /usr/bin/python${PYTHON_VERSION} \ - # Set up locale - && locale-gen en_US.UTF-8 \ - # Cleanup - && rm -rf /var/lib/apt/lists/* \ - && apt-get clean - -# Install sccache if requested -COPY container/use-sccache.sh /tmp/use-sccache.sh -RUN if [ "$USE_SCCACHE" = "true" ]; then \ - /tmp/use-sccache.sh install; \ -fi - -# Set environment variables - they'll be empty strings if USE_SCCACHE=false -ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \ - SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \ - SCCACHE_S3_KEY_PREFIX=${USE_SCCACHE:+${ARCH}} \ - RUSTC_WRAPPER=${USE_SCCACHE:+sccache} \ - CMAKE_C_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \ - CMAKE_CXX_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \ - CMAKE_CUDA_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} +FROM lmsysorg/sglang:${SGLANG_IMAGE_TAG}-cu${CUDA_VERSION}-runtime WORKDIR /sgl-workspace -# GDRCopy installation -RUN git clone --depth 1 --branch ${GDRCOPY_COMMIT} https://github.com/NVIDIA/gdrcopy.git \ - && cd gdrcopy/packages \ - && export CUDA=${CUDA_HOME} \ - && ./build-deb-packages.sh \ - && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb - -# Fix DeepEP IBGDA symlink -RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so - -# Create dynamo user EARLY - before copying files, with group 0 for OpenShift compatibility -RUN userdel -r ubuntu > /dev/null 2>&1 || true \ - && useradd -m -s /bin/bash -g 0 dynamo \ - && [ `id -u dynamo` -eq 1000 ] \ - && mkdir -p /workspace /home/dynamo/.cache /opt/dynamo \ - && chown -R dynamo: /sgl-workspace /workspace /home/dynamo /opt/dynamo \ - && chmod -R g+w /sgl-workspace /workspace /home/dynamo/.cache /opt/dynamo - -USER dynamo -ENV HOME=/home/dynamo - -# Install SGLang (requires CUDA 12.8.1 or 12.9.1) -RUN python3 -m pip install --no-cache-dir --ignore-installed pip==25.3 setuptools==80.9.0 wheel==0.45.1 html5lib==1.1 six==1.17.0 \ - && git clone --depth 1 --branch v${SGLANG_COMMIT} https://github.com/sgl-project/sglang.git \ - && cd sglang \ - && case "$CUDA_VERSION" in \ - 12.8.1) CUINDEX=128 ;; \ - 12.9.1) CUINDEX=129 ;; \ - *) echo "Error: Unsupported CUDA version for sglang: $CUDA_VERSION (requires 12.8.1 or 12.9.1)" && exit 1 ;; \ - esac \ - && python3 -m pip install --no-cache-dir sgl-kernel==${SGL_KERNEL_VERSION} \ - && python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \ - && python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps \ - && FLASHINFER_LOGGING_LEVEL=warning python3 -m flashinfer --download-cubin +ARG BRANCH_TYPE +ARG CARGO_BUILD_JOBS -# Download and extract NVSHMEM source, clone DeepEP (use Tom's fork for GB200) -RUN --mount=type=cache,target=/var/cache/curl,uid=1000,gid=0 \ - curl --retry 3 --retry-delay 2 -fsSL -o /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz https://developer.download.nvidia.com/compute/redist/nvshmem/${NVSHMEM_VERSION}/source/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \ - && tar -xf /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \ - && mv nvshmem_src nvshmem \ - && rm -f /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \ - && if [ "$GRACE_BLACKWELL" = true ]; then \ - git clone --depth 1 https://github.com/fzyzcjy/DeepEP.git \ - && cd DeepEP \ - && git fetch --depth 1 origin ${DEEPEP_GB_COMMIT} \ - && git checkout ${DEEPEP_GB_COMMIT}; \ +# Only copies local src when BRANCH_TYPE=local +RUN --mount=type=bind,source=.,target=/mnt/local_src \ + if [ "$BRANCH_TYPE" = "local" ]; then \ + cp -r /mnt/local_src /sgl-workspace/dynamo; \ + elif [ "$BRANCH_TYPE" = "remote" ]; then \ + git clone https://github.com/ai-dynamo/dynamo.git /sgl-workspace/dynamo; \ + fi + +ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} +ARG DYNAMO_VERSION + +# SGLang does not use a venv in their container +# BRANCH_TYPE=local -> build from local repo with maturin +# BRANCH_TYPE=remote -> build from github with maturin +# otherwise -> pip install ai-dynamo (with optional version) +RUN --mount=type=bind,source=.,target=/mnt/local_src \ + if [ "$BRANCH_TYPE" = "local" ]; then \ + cd dynamo/lib/bindings/python && \ + pip install --break-system-packages maturin && \ + maturin build --release && \ + pip install --break-system-packages target/wheels/*.whl && \ + cd /sgl-workspace/dynamo && \ + pip install --break-system-packages -e . && \ + pip install --break-system-packages --requirement /mnt/local_src/container/deps/requirements.txt ; \ + elif [ "$BRANCH_TYPE" = "remote" ]; then \ + cd dynamo/lib/bindings/python && \ + pip install --break-system-packages maturin && \ + maturin build --release && \ + pip install --break-system-packages target/wheels/*.whl && \ + cd /sgl-workspace/dynamo && \ + pip install --break-system-packages -e . && \ + pip install --break-system-packages --requirement /sgl-workspace/dynamo/container/deps/requirements.txt ; \ + elif [ -n "$DYNAMO_VERSION" ]; then \ + pip install --break-system-packages ai-dynamo==${DYNAMO_VERSION} ; \ else \ - git clone --depth 1 https://github.com/deepseek-ai/DeepEP.git \ - && cd DeepEP \ - && git fetch --depth 1 origin ${DEEPEP_COMMIT} \ - && git checkout ${DEEPEP_COMMIT}; \ - fi \ - && sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh - -# Build and install NVSHMEM library only (without python library) -RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ - --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ - export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \ - cd /sgl-workspace/nvshmem && \ - if [ "$GRACE_BLACKWELL" = true ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \ - NVSHMEM_SHMEM_SUPPORT=0 \ - NVSHMEM_UCX_SUPPORT=0 \ - NVSHMEM_USE_NCCL=0 \ - NVSHMEM_MPI_SUPPORT=0 \ - NVSHMEM_IBGDA_SUPPORT=1 \ - NVSHMEM_PMIX_SUPPORT=0 \ - NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ - NVSHMEM_USE_GDRCOPY=1 \ - cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} -DNVSHMEM_BUILD_PYTHON_LIB=OFF && \ - cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL} && \ - /tmp/use-sccache.sh show-stats "NVSHMEM" - -# Build nvshmem4py wheels separately (Python 3.10, CUDA 12) to avoid building the python library twice for multiple python versions -# Need to reconfigure with PYTHON_LIB=ON to add the nvshmem4py subdirectory -RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ - --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ - export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \ - cd /sgl-workspace/nvshmem && \ - if [ "$GRACE_BLACKWELL" = true ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \ - NVSHMEM_SHMEM_SUPPORT=0 \ - NVSHMEM_UCX_SUPPORT=0 \ - NVSHMEM_USE_NCCL=0 \ - NVSHMEM_MPI_SUPPORT=0 \ - NVSHMEM_IBGDA_SUPPORT=1 \ - NVSHMEM_PMIX_SUPPORT=0 \ - NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ - NVSHMEM_USE_GDRCOPY=1 \ - cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} -DNVSHMEM_BUILD_PYTHON_LIB=ON && \ - cmake --build build --target build_nvshmem4py_wheel_cu12_${PYTHON_VERSION} -j${CMAKE_BUILD_PARALLEL_LEVEL} && \ - /tmp/use-sccache.sh show-stats "NVSHMEM4PY" - -# Install DeepEP -RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ - --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ - export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \ - cd /sgl-workspace/DeepEP && \ - NVSHMEM_DIR=${NVSHMEM_DIR} TORCH_CUDA_ARCH_LIST="9.0;10.0" pip install --no-build-isolation . - -# Copy rust installation from dynamo_base to avoid duplication efforts -COPY --from=dynamo_base /usr/local/rustup /usr/local/rustup -COPY --from=dynamo_base /usr/local/cargo /usr/local/cargo - -ENV RUSTUP_HOME=/usr/local/rustup \ - CARGO_HOME=/usr/local/cargo \ - CARGO_TARGET_DIR=/workspace/target \ - PATH=/usr/local/cargo/bin:$PATH \ - CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} - -# Install essential Python build tools -RUN python3 -m pip install --no-cache-dir \ - mooncake-transfer-engine==0.3.6.post1 \ - scikit-build-core==0.11.6 \ - setuptools-rust==1.12.0 - -################################################## -########## Runtime Image ######################## -################################################## -# -# PURPOSE: Production runtime environment -# -# This stage creates a production-ready image containing: -# - Pre-compiled SGLang, DeepEP, and NVSHMEM components -# - Dynamo runtime libraries and Python packages -# - Essential runtime dependencies and configurations -# - Optimized for inference workloads and deployment -# -# Use this stage when you need: -# - Production deployment of Dynamo with SGLang + DeepEP -# - Minimal runtime footprint without build tools -# - Ready-to-run inference server environment -# -FROM framework AS runtime - -WORKDIR /workspace - -ARG ARCH -ARG ARCH_ALT -ARG PYTHON_VERSION - -ENV DYNAMO_HOME=/opt/dynamo -ENV NVSHMEM_DIR=/sgl-workspace/nvshmem/install -ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl -ENV NIXL_LIB_DIR=${NIXL_PREFIX}/lib/${ARCH_ALT}-linux-gnu -ENV NIXL_PLUGIN_DIR=${NIXL_LIB_DIR}/plugins -ENV LD_LIBRARY_PATH=\ -${NVSHMEM_DIR}/lib:\ -${NIXL_LIB_DIR}:\ -${NIXL_PLUGIN_DIR}:\ -/usr/local/ucx/lib:\ -/usr/local/ucx/lib/ucx:\ -/usr/local/nvidia/lib64:\ -${LD_LIBRARY_PATH} - -# Copy NATS and ETCD from dynamo_base, and UCX/NIXL -COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server -COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/ -COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx -COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX -ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:${HOME}/.local/bin:$PATH - -# Install Dynamo wheels from dynamo_base wheelhouse -COPY --chown=dynamo: benchmarks/ /opt/dynamo/benchmarks/ -COPY --chown=dynamo: --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/ -RUN python3 -m pip install \ - /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \ - /opt/dynamo/wheelhouse/ai_dynamo*any.whl \ - /opt/dynamo/wheelhouse/nixl/nixl*.whl \ - && cd /opt/dynamo/benchmarks \ - && python3 -m pip install --no-cache . \ - && cd - \ - && rm -rf /opt/dynamo/benchmarks - -# Install common and test dependencies -RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ - --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \ - python3 -m pip install \ - --no-cache \ - --requirement /tmp/requirements.txt \ - --requirement /tmp/requirements.test.txt - -## Copy attribution files and launch banner with correct ownership -COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/ - -# Setup launch banner in common directory accessible to all users -RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \ - sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen - -# Setup environment for all users -USER root -RUN chmod 755 /opt/dynamo/.launch_screen && \ - echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \ - echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc - -USER dynamo - -# Copy tests, benchmarks, deploy and components for CI with correct ownership -COPY --chown=dynamo: tests /workspace/tests -COPY --chown=dynamo: examples /workspace/examples -COPY --chown=dynamo: benchmarks /workspace/benchmarks -COPY --chown=dynamo: deploy /workspace/deploy -COPY --chown=dynamo: components/ /workspace/components/ -COPY --chown=dynamo: recipes/ /workspace/recipes/ - -ARG DYNAMO_COMMIT_SHA -ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA - -ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] -CMD [] - -########################################################### -########## Development (run.sh, runs as root user) ######## -########################################################### -# -# PURPOSE: Local development environment for use with run.sh (not Dev Container plug-in) -# -# This stage runs as root and provides: -# - Development tools and utilities for local debugging -# - Support for vscode/cursor development outside the Dev Container plug-in -# -# Use this stage if you need a full-featured development environment with extra tools, -# but do not use it with the Dev Container plug-in. - -FROM runtime AS dev - -ARG WORKSPACE_DIR=/sgl-workspace/dynamo -ARG PYTHON_VERSION - -# NOTE: SGLang uses system Python (not a virtualenv in framework/runtime stages) to align with -# upstream SGLang Dockerfile: https://github.com/sgl-project/sglang/blob/main/docker/Dockerfile -# For dev stage, we create a lightweight venv with --system-site-packages to satisfy maturin develop -# requirements while still accessing all system-installed packages (sglang, torch, deepep, etc.) -COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ -RUN mkdir -p /opt/dynamo/venv && \ - uv venv /opt/dynamo/venv --python $PYTHON_VERSION --system-site-packages - -ENV VIRTUAL_ENV=/opt/dynamo/venv \ - PATH="/opt/dynamo/venv/bin:${PATH}" - -USER root -# Install development tools and utilities -RUN apt-get update -y && \ - apt-get install -y --no-install-recommends \ - # System monitoring and debugging tools - nvtop \ - htop \ - gdb \ - # Network and system utilities - wget \ - iproute2 \ - net-tools \ - openssh-client \ - rsync \ - lsof \ - # File and archive utilities - zip \ - tree \ - # Development and build tools - vim \ - tmux \ - git \ - git-lfs \ - autoconf \ - automake \ - cmake \ - libtool \ - meson \ - bear \ - ccache \ - less \ - # Language and development support - clang \ - libclang-dev \ - # Shell and productivity tools - zsh \ - silversearcher-ag \ - cloc \ - locales \ - # sudo for dev stage - sudo \ - # NVIDIA tools dependencies - gnupg && \ - echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64 /" | tee /etc/apt/sources.list.d/nvidia-devtools.list && \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub && \ - apt-get update -y && \ - apt-get install -y nsight-systems-cli && \ - rm -rf /var/lib/apt/lists/* - -# Install clang-format and clangd -RUN curl --retry 3 --retry-delay 2 -LSso /usr/local/bin/clang-format https://github.com/muttleyxd/clang-tools-static-binaries/releases/download/master-32d3ac78/clang-format-16_linux-amd64 \ - && chmod +x /usr/local/bin/clang-format \ - && curl --retry 3 --retry-delay 2 -L https://github.com/clangd/clangd/releases/download/18.1.3/clangd-linux-18.1.3.zip -o clangd.zip \ - && unzip clangd.zip \ - && cp -r clangd_18.1.3/bin/* /usr/local/bin/ \ - && cp -r clangd_18.1.3/lib/* /usr/local/lib/ \ - && rm -rf clangd_18.1.3 clangd.zip - -# Editable install of dynamo -COPY pyproject.toml README.md hatch_build.py /workspace/ -RUN python3 -m pip install --no-deps -e . - -# Install Python development packages -RUN python3 -m pip install --no-cache-dir \ - maturin[patchelf] \ - pytest \ - black \ - isort \ - icdiff \ - scikit_build_core \ - uv \ - pre-commit \ - pandas \ - matplotlib \ - tabulate - -ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] -CMD [] + pip install --break-system-packages ai-dynamo ; \ + fi + +# Install NATS and ETCD +RUN case "$(uname -m)" in \ + x86_64) ARCH=amd64 ;; \ + aarch64) ARCH=arm64 ;; \ + *) echo "Unsupported architecture: $(uname -m)" && exit 1 ;; \ + esac && \ + wget --tries=3 --waitretry=5 \ + https://github.com/nats-io/nats-server/releases/download/v2.10.28/\ +nats-server-v2.10.28-${ARCH}.deb && \ + dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb + +ENV ETCD_VERSION="v3.5.21" +RUN case "$(uname -m)" in \ + x86_64) ARCH=amd64 ;; \ + aarch64) ARCH=arm64 ;; \ + *) echo "Unsupported architecture: $(uname -m)" && exit 1 ;; \ + esac && \ + wget --tries=3 --waitretry=5 \ + https://github.com/etcd-io/etcd/releases/download/${ETCD_VERSION}/\ +etcd-${ETCD_VERSION}-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \ + mkdir -p /usr/local/bin/etcd && \ + tar -xzf /tmp/etcd.tar.gz \ + -C /usr/local/bin/etcd --strip-components=1 && \ + rm /tmp/etcd.tar.gz + +ENV PATH=/usr/local/bin/etcd:$PATH + +# Enable forceful shutdown of inflight requests +ENV SGLANG_FORCE_SHUTDOWN=1 + +WORKDIR /sgl-workspace/dynamo/examples/backends/sglang diff --git a/container/Dockerfile.sglang-wideep b/container/Dockerfile.sglang-wideep deleted file mode 100644 index 7d6ce56152..0000000000 --- a/container/Dockerfile.sglang-wideep +++ /dev/null @@ -1,93 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -ARG SGLANG_IMAGE_TAG="v0.5.3.post2" -ARG BRANCH_TYPE -ARG CARGO_BUILD_JOBS - -FROM scratch AS local_src -COPY . /src - -FROM lmsysorg/sglang:${SGLANG_IMAGE_TAG} - -WORKDIR /sgl-workspace - -ARG DYNAMO_COMMIT_SHA -ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA - -# Install jq for JSON processing -RUN apt-get update -y \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - jq \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# Install dynamo -# Providing --build-arg BRANCH_TYPE=local will editable install the local dynamo repo -# Providing --build-arg BRANCH_TYPE=remote will editable install the remote dynamo repo -# Default is to install the latest published dynamo version -ARG BRANCH_TYPE -ARG CARGO_BUILD_JOBS - -COPY --from=local_src /src /tmp/local_src -RUN if [ "$BRANCH_TYPE" = "local" ]; then \ - cp -r /tmp/local_src /sgl-workspace/dynamo; \ - elif [ "$BRANCH_TYPE" = "remote" ]; then \ - git clone https://github.com/ai-dynamo/dynamo.git /sgl-workspace/dynamo; \ - fi - -ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} - -# SGLang does not use a venv in their container -RUN if [ "$BRANCH_TYPE" = "local" ]; then \ - cd dynamo/lib/bindings/python && \ - pip install --break-system-packages maturin && \ - maturin build --release && \ - pip install --break-system-packages target/wheels/*.whl && \ - cd /sgl-workspace/dynamo && \ - pip install --break-system-packages -e . && \ - pip install --break-system-packages --requirement /tmp/local_src/container/deps/requirements.txt ; \ - elif [ "$BRANCH_TYPE" = "remote" ]; then \ - cd dynamo/lib/bindings/python && \ - pip install --break-system-packages maturin && \ - maturin build --release && \ - pip install --break-system-packages target/wheels/*.whl && \ - cd /sgl-workspace/dynamo && \ - pip install --break-system-packages -e . && \ - pip install --break-system-packages --requirement /sgl-workspace/dynamo/container/deps/requirements.txt ; \ - else \ - pip install --break-system-packages ai-dynamo ; \ - fi \ -&& rm -rf /tmp/local_src - -# Install NATS and ETCD -RUN case "$(uname -m)" in \ - x86_64) ARCH=amd64 ;; \ - aarch64) ARCH=arm64 ;; \ - *) echo "Unsupported architecture: $(uname -m)" && exit 1 ;; \ - esac && \ - wget --tries=3 --waitretry=5 \ - https://github.com/nats-io/nats-server/releases/download/v2.10.28/\ -nats-server-v2.10.28-${ARCH}.deb && \ - dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb - -ENV ETCD_VERSION="v3.5.21" -RUN case "$(uname -m)" in \ - x86_64) ARCH=amd64 ;; \ - aarch64) ARCH=arm64 ;; \ - *) echo "Unsupported architecture: $(uname -m)" && exit 1 ;; \ - esac && \ - wget --tries=3 --waitretry=5 \ - https://github.com/etcd-io/etcd/releases/download/${ETCD_VERSION}/\ -etcd-${ETCD_VERSION}-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \ - mkdir -p /usr/local/bin/etcd && \ - tar -xzf /tmp/etcd.tar.gz \ - -C /usr/local/bin/etcd --strip-components=1 && \ - rm /tmp/etcd.tar.gz - -ENV PATH=/usr/local/bin/etcd:$PATH - -# Enable forceful shutdown of inflight requests -ENV SGL_FORCE_SHUTDOWN=1 - -WORKDIR /sgl-workspace/dynamo/examples/backends/sglang