diff --git a/container/Dockerfile.sglang b/container/Dockerfile.sglang
index bff39a2dfe..03f9bb6545 100644
--- a/container/Dockerfile.sglang
+++ b/container/Dockerfile.sglang
@@ -2,497 +2,87 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-# This section contains build arguments that are common and shared with
-# the plain Dockerfile, so they should NOT have a default. The source of truth is from build.sh.
-
-ARG BASE_IMAGE
-ARG BASE_IMAGE_TAG
-
-ARG FRAMEWORK_IMAGE
-ARG FRAMEWORK_IMAGE_TAG
-ARG PYTHON_VERSION
-ARG CUDA_VERSION
-
-ARG ARCH=amd64
-ARG ARCH_ALT=x86_64
-ARG CARGO_BUILD_JOBS
-
-# sccache configuration - inherit from base build
-ARG USE_SCCACHE
-ARG SCCACHE_BUCKET=""
-ARG SCCACHE_REGION=""
-
-ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
-FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
-
-########################################################
-########## Framework Development Image ################
-########################################################
-#
-# PURPOSE: Framework development and SGLang/DeepEP/NVSHMEM compilation
-#
-# This stage builds and compiles framework dependencies including:
-# - SGLang inference engine with CUDA support
-# - DeepEP and NVSHMEM
-# - All necessary build tools and compilation dependencies
-# - Framework-level Python packages and extensions
-#
-# Use this stage when you need to:
-# - Build SGLang from source with custom modifications
-# - Develop or debug framework-level components
-# - Create custom builds with specific optimization flags
-#
-#FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu24.04 AS framework
-FROM ${FRAMEWORK_IMAGE}:${FRAMEWORK_IMAGE_TAG} AS framework
-
-# Declare all ARGs
-ARG BUILD_TYPE=all
-ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee
-ARG DEEPEP_GB_COMMIT=1b14ad661c7640137fcfe93cccb2694ede1220b0
-ARG CMAKE_BUILD_PARALLEL_LEVEL=2
-ARG SGL_KERNEL_VERSION=0.3.16.post5
-ARG SGLANG_COMMIT=0.5.4.post3
-ARG GDRCOPY_COMMIT=v2.4.4
-ARG NVSHMEM_VERSION=3.3.9
-ARG GRACE_BLACKWELL=false
-ARG ARCH
-ARG ARCH_ALT
-ARG PYTHON_VERSION
-ARG USE_SCCACHE
-ARG SCCACHE_BUCKET
-ARG SCCACHE_REGION
+ARG SGLANG_IMAGE_TAG="v0.5.6.post1"
+# Either 12 or 13
+ARG CUDA_VERSION="12"
+ARG BRANCH_TYPE
+# PyPI version to install when BRANCH_TYPE is not set 
+ARG DYNAMO_VERSION=""
 ARG CARGO_BUILD_JOBS
-ARG CUDA_VERSION
-
-# Set all environment variables
-ENV DEBIAN_FRONTEND=noninteractive \
-    TZ=America/Los_Angeles \
-    CUDA_HOME=/usr/local/cuda \
-    GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \
-    NVSHMEM_DIR=/sgl-workspace/nvshmem/install \
-    PATH="${PATH}:/usr/local/nvidia/bin" \
-    LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib:/usr/local/nvidia/lib64" \
-    LANG=en_US.UTF-8 \
-    LANGUAGE=en_US:en \
-    LC_ALL=en_US.UTF-8
 
-# Combined: Python setup, locale, and all package installation
-RUN apt-get update \
-    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends software-properties-common \
-    && add-apt-repository ppa:deadsnakes/ppa -y \
-    && apt-get update \
-    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-        # Python (using other python versions as needed)
-        python${PYTHON_VERSION}-dev \
-        python${PYTHON_VERSION}-venv \
-        python${PYTHON_VERSION}-distutils \
-        python3-pip \
-        # Build essentials
-        build-essential \
-        cmake \
-        ninja-build \
-        ccache \
-        patchelf \
-        git \
-        git-lfs \
-        # Core system utilities
-        tzdata \
-        locales \
-        ca-certificates \
-        dkms \
-        kmod \
-        # Command line tools
-        wget \
-        curl \
-        jq \
-        unzip \
-        # Network utilities
-        netcat-openbsd \
-        # SSL and pkg-config
-        libssl-dev \
-        pkg-config \
-        # MPI and NUMA
-        libopenmpi-dev \
-        libnuma1 \
-        libnuma-dev \
-        numactl \
-        # InfiniBand/RDMA
-        libibverbs-dev \
-        libibverbs1 \
-        libibumad3 \
-        librdmacm1 \
-        libnl-3-200 \
-        libnl-route-3-200 \
-        libnl-route-3-dev \
-        libnl-3-dev \
-        ibverbs-providers \
-        infiniband-diags \
-        perftest \
-        # Development libraries
-        libgoogle-glog-dev \
-        libgtest-dev \
-        libjsoncpp-dev \
-        libunwind-dev \
-        libboost-all-dev \
-        libgrpc-dev \
-        libgrpc++-dev \
-        libprotobuf-dev \
-        protobuf-compiler \
-        protobuf-compiler-grpc \
-        pybind11-dev \
-        libhiredis-dev \
-        libcurl4-openssl-dev \
-        libczmq4 \
-        libczmq-dev \
-        libfabric-dev \
-        # Package building tools
-        devscripts \
-        debhelper \
-        fakeroot \
-        check \
-        libsubunit0 \
-        libsubunit-dev \
-    # Set Python alternatives
-    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
-    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
-    && update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 \
-    && update-alternatives --set python /usr/bin/python${PYTHON_VERSION} \
-    # Set up locale
-    && locale-gen en_US.UTF-8 \
-    # Cleanup
-    && rm -rf /var/lib/apt/lists/* \
-    && apt-get clean
-
-# Install sccache if requested
-COPY container/use-sccache.sh /tmp/use-sccache.sh
-RUN if [ "$USE_SCCACHE" = "true" ]; then \
-    /tmp/use-sccache.sh install; \
-fi
-
-# Set environment variables - they'll be empty strings if USE_SCCACHE=false
-ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
-    SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
-    SCCACHE_S3_KEY_PREFIX=${USE_SCCACHE:+${ARCH}} \
-    RUSTC_WRAPPER=${USE_SCCACHE:+sccache} \
-    CMAKE_C_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \
-    CMAKE_CXX_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \
-    CMAKE_CUDA_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache}
+FROM lmsysorg/sglang:${SGLANG_IMAGE_TAG}-cu${CUDA_VERSION}-runtime
 
 WORKDIR /sgl-workspace
 
-# GDRCopy installation
-RUN git clone --depth 1 --branch ${GDRCOPY_COMMIT} https://github.com/NVIDIA/gdrcopy.git \
-    && cd gdrcopy/packages \
-    && export CUDA=${CUDA_HOME} \
-    && ./build-deb-packages.sh \
-    && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb
-
-# Fix DeepEP IBGDA symlink
-RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so
-
-# Create dynamo user EARLY - before copying files, with group 0 for OpenShift compatibility
-RUN userdel -r ubuntu > /dev/null 2>&1 || true \
-    && useradd -m -s /bin/bash -g 0 dynamo \
-    && [ `id -u dynamo` -eq 1000 ] \
-    && mkdir -p /workspace /home/dynamo/.cache /opt/dynamo \
-    && chown -R dynamo: /sgl-workspace /workspace /home/dynamo /opt/dynamo \
-    && chmod -R g+w /sgl-workspace /workspace /home/dynamo/.cache /opt/dynamo
-
-USER dynamo
-ENV HOME=/home/dynamo
-
-# Install SGLang (requires CUDA 12.8.1 or 12.9.1)
-RUN python3 -m pip install --no-cache-dir --ignore-installed pip==25.3 setuptools==80.9.0 wheel==0.45.1 html5lib==1.1 six==1.17.0 \
-    && git clone --depth 1 --branch v${SGLANG_COMMIT} https://github.com/sgl-project/sglang.git \
-    && cd sglang \
-    && case "$CUDA_VERSION" in \
-        12.8.1) CUINDEX=128 ;; \
-        12.9.1) CUINDEX=129 ;; \
-        *) echo "Error: Unsupported CUDA version for sglang: $CUDA_VERSION (requires 12.8.1 or 12.9.1)" && exit 1 ;; \
-    esac \
-    && python3 -m pip install --no-cache-dir sgl-kernel==${SGL_KERNEL_VERSION} \
-    && python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
-    && python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps \
-    && FLASHINFER_LOGGING_LEVEL=warning python3 -m flashinfer --download-cubin
+ARG BRANCH_TYPE
+ARG CARGO_BUILD_JOBS
 
-# Download and extract NVSHMEM source, clone DeepEP (use Tom's fork for GB200)
-RUN --mount=type=cache,target=/var/cache/curl,uid=1000,gid=0 \
-    curl --retry 3 --retry-delay 2 -fsSL -o /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz https://developer.download.nvidia.com/compute/redist/nvshmem/${NVSHMEM_VERSION}/source/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
-    && tar -xf /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
-    && mv nvshmem_src nvshmem \
-    && rm -f /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
-    && if [ "$GRACE_BLACKWELL" = true ]; then \
-        git clone --depth 1 https://github.com/fzyzcjy/DeepEP.git \
-        && cd DeepEP \
-        && git fetch --depth 1 origin ${DEEPEP_GB_COMMIT} \
-        && git checkout ${DEEPEP_GB_COMMIT}; \
+# Only copies local src when BRANCH_TYPE=local
+RUN --mount=type=bind,source=.,target=/mnt/local_src \
+    if [ "$BRANCH_TYPE" = "local" ]; then \
+        cp -r /mnt/local_src /sgl-workspace/dynamo; \
+    elif [ "$BRANCH_TYPE" = "remote" ]; then \
+        git clone https://github.com/ai-dynamo/dynamo.git /sgl-workspace/dynamo; \
+    fi
+
+ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
+ARG DYNAMO_VERSION
+
+# SGLang does not use a venv in their container
+# BRANCH_TYPE=local  -> build from local repo with maturin
+# BRANCH_TYPE=remote -> build from github with maturin  
+# otherwise          -> pip install ai-dynamo (with optional version)
+RUN --mount=type=bind,source=.,target=/mnt/local_src \
+    if [ "$BRANCH_TYPE" = "local" ]; then \
+        cd dynamo/lib/bindings/python && \
+        pip install --break-system-packages maturin && \
+        maturin build --release && \
+        pip install --break-system-packages target/wheels/*.whl && \
+        cd /sgl-workspace/dynamo && \
+        pip install --break-system-packages -e . && \
+        pip install --break-system-packages --requirement /mnt/local_src/container/deps/requirements.txt ; \
+    elif [ "$BRANCH_TYPE" = "remote" ]; then \
+        cd dynamo/lib/bindings/python && \
+        pip install --break-system-packages maturin && \
+        maturin build --release && \
+        pip install --break-system-packages target/wheels/*.whl && \
+        cd /sgl-workspace/dynamo && \
+        pip install --break-system-packages -e . && \
+        pip install --break-system-packages --requirement /sgl-workspace/dynamo/container/deps/requirements.txt ; \
+    elif [ -n "$DYNAMO_VERSION" ]; then \
+        pip install --break-system-packages ai-dynamo==${DYNAMO_VERSION} ; \
     else \
-        git clone --depth 1 https://github.com/deepseek-ai/DeepEP.git \
-        && cd DeepEP \
-        && git fetch --depth 1 origin ${DEEPEP_COMMIT} \
-        && git checkout ${DEEPEP_COMMIT}; \
-    fi \
-    && sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh
-
-# Build and install NVSHMEM library only (without python library)
-RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
-    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
-    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
-    cd /sgl-workspace/nvshmem && \
-    if [ "$GRACE_BLACKWELL" = true ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \
-    NVSHMEM_SHMEM_SUPPORT=0 \
-    NVSHMEM_UCX_SUPPORT=0 \
-    NVSHMEM_USE_NCCL=0 \
-    NVSHMEM_MPI_SUPPORT=0 \
-    NVSHMEM_IBGDA_SUPPORT=1 \
-    NVSHMEM_PMIX_SUPPORT=0 \
-    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
-    NVSHMEM_USE_GDRCOPY=1 \
-    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} -DNVSHMEM_BUILD_PYTHON_LIB=OFF && \
-    cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL} && \
-    /tmp/use-sccache.sh show-stats "NVSHMEM"
-
-# Build nvshmem4py wheels separately (Python 3.10, CUDA 12) to avoid building the python library twice for multiple python versions
-# Need to reconfigure with PYTHON_LIB=ON to add the nvshmem4py subdirectory
-RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
-    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
-    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
-    cd /sgl-workspace/nvshmem && \
-    if [ "$GRACE_BLACKWELL" = true ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \
-    NVSHMEM_SHMEM_SUPPORT=0 \
-    NVSHMEM_UCX_SUPPORT=0 \
-    NVSHMEM_USE_NCCL=0 \
-    NVSHMEM_MPI_SUPPORT=0 \
-    NVSHMEM_IBGDA_SUPPORT=1 \
-    NVSHMEM_PMIX_SUPPORT=0 \
-    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
-    NVSHMEM_USE_GDRCOPY=1 \
-    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} -DNVSHMEM_BUILD_PYTHON_LIB=ON && \
-    cmake --build build --target build_nvshmem4py_wheel_cu12_${PYTHON_VERSION} -j${CMAKE_BUILD_PARALLEL_LEVEL} && \
-    /tmp/use-sccache.sh show-stats "NVSHMEM4PY"
-
-# Install DeepEP
-RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
-    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
-    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
-    cd /sgl-workspace/DeepEP && \
-    NVSHMEM_DIR=${NVSHMEM_DIR} TORCH_CUDA_ARCH_LIST="9.0;10.0" pip install --no-build-isolation .
-
-# Copy rust installation from dynamo_base to avoid duplication efforts
-COPY --from=dynamo_base /usr/local/rustup /usr/local/rustup
-COPY --from=dynamo_base /usr/local/cargo /usr/local/cargo
-
-ENV RUSTUP_HOME=/usr/local/rustup \
-    CARGO_HOME=/usr/local/cargo \
-    CARGO_TARGET_DIR=/workspace/target \
-    PATH=/usr/local/cargo/bin:$PATH \
-    CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
-
-# Install essential Python build tools
-RUN python3 -m pip install --no-cache-dir \
-    mooncake-transfer-engine==0.3.6.post1 \
-    scikit-build-core==0.11.6 \
-    setuptools-rust==1.12.0
-
-##################################################
-########## Runtime Image ########################
-##################################################
-#
-# PURPOSE: Production runtime environment
-#
-# This stage creates a production-ready image containing:
-# - Pre-compiled SGLang, DeepEP, and NVSHMEM components
-# - Dynamo runtime libraries and Python packages
-# - Essential runtime dependencies and configurations
-# - Optimized for inference workloads and deployment
-#
-# Use this stage when you need:
-# - Production deployment of Dynamo with SGLang + DeepEP
-# - Minimal runtime footprint without build tools
-# - Ready-to-run inference server environment
-#
-FROM framework AS runtime
-
-WORKDIR /workspace
-
-ARG ARCH
-ARG ARCH_ALT
-ARG PYTHON_VERSION
-
-ENV DYNAMO_HOME=/opt/dynamo
-ENV NVSHMEM_DIR=/sgl-workspace/nvshmem/install
-ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
-ENV NIXL_LIB_DIR=${NIXL_PREFIX}/lib/${ARCH_ALT}-linux-gnu
-ENV NIXL_PLUGIN_DIR=${NIXL_LIB_DIR}/plugins
-ENV LD_LIBRARY_PATH=\
-${NVSHMEM_DIR}/lib:\
-${NIXL_LIB_DIR}:\
-${NIXL_PLUGIN_DIR}:\
-/usr/local/ucx/lib:\
-/usr/local/ucx/lib/ucx:\
-/usr/local/nvidia/lib64:\
-${LD_LIBRARY_PATH}
-
-# Copy NATS and ETCD from dynamo_base, and UCX/NIXL
-COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
-COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
-COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
-COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
-ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:${HOME}/.local/bin:$PATH
-
-# Install Dynamo wheels from dynamo_base wheelhouse
-COPY --chown=dynamo: benchmarks/ /opt/dynamo/benchmarks/
-COPY --chown=dynamo: --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
-RUN python3 -m pip install \
-    /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
-    /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
-    /opt/dynamo/wheelhouse/nixl/nixl*.whl \
-    && cd /opt/dynamo/benchmarks \
-    && python3 -m pip install --no-cache . \
-    && cd - \
-    && rm -rf /opt/dynamo/benchmarks
-
-# Install common and test dependencies
-RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
-    --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
-    python3 -m pip install \
-        --no-cache \
-        --requirement /tmp/requirements.txt \
-        --requirement /tmp/requirements.test.txt
-
-## Copy attribution files and launch banner with correct ownership
-COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
-
-# Setup launch banner in common directory accessible to all users
-RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \
-    sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
-
-# Setup environment for all users
-USER root
-RUN chmod 755 /opt/dynamo/.launch_screen && \
-    echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
-    echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
-
-USER dynamo
-
-# Copy tests, benchmarks, deploy and components for CI with correct ownership
-COPY --chown=dynamo: tests /workspace/tests
-COPY --chown=dynamo: examples /workspace/examples
-COPY --chown=dynamo: benchmarks /workspace/benchmarks
-COPY --chown=dynamo: deploy /workspace/deploy
-COPY --chown=dynamo: components/ /workspace/components/
-COPY --chown=dynamo: recipes/ /workspace/recipes/
-
-ARG DYNAMO_COMMIT_SHA
-ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
-
-ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
-CMD []
-
-###########################################################
-########## Development (run.sh, runs as root user) ########
-###########################################################
-#
-# PURPOSE: Local development environment for use with run.sh (not Dev Container plug-in)
-#
-# This stage runs as root and provides:
-# - Development tools and utilities for local debugging
-# - Support for vscode/cursor development outside the Dev Container plug-in
-#
-# Use this stage if you need a full-featured development environment with extra tools,
-# but do not use it with the Dev Container plug-in.
-
-FROM runtime AS dev
-
-ARG WORKSPACE_DIR=/sgl-workspace/dynamo
-ARG PYTHON_VERSION
-
-# NOTE: SGLang uses system Python (not a virtualenv in framework/runtime stages) to align with
-# upstream SGLang Dockerfile: https://github.com/sgl-project/sglang/blob/main/docker/Dockerfile
-# For dev stage, we create a lightweight venv with --system-site-packages to satisfy maturin develop
-# requirements while still accessing all system-installed packages (sglang, torch, deepep, etc.)
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
-RUN mkdir -p /opt/dynamo/venv && \
-    uv venv /opt/dynamo/venv --python $PYTHON_VERSION --system-site-packages
-
-ENV VIRTUAL_ENV=/opt/dynamo/venv \
-    PATH="/opt/dynamo/venv/bin:${PATH}"
-
-USER root
-# Install development tools and utilities
-RUN apt-get update -y && \
-    apt-get install -y --no-install-recommends  \
-    # System monitoring and debugging tools
-    nvtop \
-    htop \
-    gdb \
-    # Network and system utilities
-    wget \
-    iproute2 \
-    net-tools \
-    openssh-client \
-    rsync \
-    lsof \
-    # File and archive utilities
-    zip \
-    tree \
-    # Development and build tools
-    vim \
-    tmux \
-    git \
-    git-lfs \
-    autoconf \
-    automake \
-    cmake \
-    libtool \
-    meson \
-    bear \
-    ccache \
-    less \
-    # Language and development support
-    clang \
-    libclang-dev \
-    # Shell and productivity tools
-    zsh \
-    silversearcher-ag \
-    cloc \
-    locales \
-    # sudo for dev stage
-    sudo \
-    # NVIDIA tools dependencies
-    gnupg && \
-    echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64 /" | tee /etc/apt/sources.list.d/nvidia-devtools.list && \
-    apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub && \
-    apt-get update -y && \
-    apt-get install -y nsight-systems-cli && \
-    rm -rf /var/lib/apt/lists/*
-
-# Install clang-format and clangd
-RUN curl --retry 3 --retry-delay 2 -LSso /usr/local/bin/clang-format https://github.com/muttleyxd/clang-tools-static-binaries/releases/download/master-32d3ac78/clang-format-16_linux-amd64 \
-    && chmod +x /usr/local/bin/clang-format \
-    && curl --retry 3 --retry-delay 2 -L https://github.com/clangd/clangd/releases/download/18.1.3/clangd-linux-18.1.3.zip -o clangd.zip \
-    && unzip clangd.zip \
-    && cp -r clangd_18.1.3/bin/* /usr/local/bin/ \
-    && cp -r clangd_18.1.3/lib/* /usr/local/lib/ \
-    && rm -rf clangd_18.1.3 clangd.zip
-
-# Editable install of dynamo
-COPY pyproject.toml README.md hatch_build.py /workspace/
-RUN python3 -m pip install --no-deps -e .
-
-# Install Python development packages
-RUN python3 -m pip install --no-cache-dir \
-    maturin[patchelf] \
-    pytest \
-    black \
-    isort \
-    icdiff \
-    scikit_build_core \
-    uv \
-    pre-commit \
-    pandas \
-    matplotlib \
-    tabulate
-
-ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
-CMD []
+        pip install --break-system-packages ai-dynamo ; \
+    fi
+
+# Install NATS and ETCD
+RUN case "$(uname -m)" in \
+      x86_64) ARCH=amd64 ;; \
+      aarch64) ARCH=arm64 ;; \
+      *) echo "Unsupported architecture: $(uname -m)" && exit 1 ;; \
+    esac && \
+    wget --tries=3 --waitretry=5 \
+      https://github.com/nats-io/nats-server/releases/download/v2.10.28/\
+nats-server-v2.10.28-${ARCH}.deb && \
+    dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb
+
+ENV ETCD_VERSION="v3.5.21"
+RUN case "$(uname -m)" in \
+      x86_64) ARCH=amd64 ;; \
+      aarch64) ARCH=arm64 ;; \
+      *) echo "Unsupported architecture: $(uname -m)" && exit 1 ;; \
+    esac && \
+    wget --tries=3 --waitretry=5 \
+      https://github.com/etcd-io/etcd/releases/download/${ETCD_VERSION}/\
+etcd-${ETCD_VERSION}-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
+    mkdir -p /usr/local/bin/etcd && \
+    tar -xzf /tmp/etcd.tar.gz \
+        -C /usr/local/bin/etcd --strip-components=1 && \
+    rm /tmp/etcd.tar.gz
+
+ENV PATH=/usr/local/bin/etcd:$PATH
+
+# Enable forceful shutdown of inflight requests
+ENV SGLANG_FORCE_SHUTDOWN=1
+
+WORKDIR /sgl-workspace/dynamo/examples/backends/sglang
diff --git a/container/Dockerfile.sglang-wideep b/container/Dockerfile.sglang-wideep
deleted file mode 100644
index 7d6ce56152..0000000000
--- a/container/Dockerfile.sglang-wideep
+++ /dev/null
@@ -1,93 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-ARG SGLANG_IMAGE_TAG="v0.5.3.post2"
-ARG BRANCH_TYPE
-ARG CARGO_BUILD_JOBS
-
-FROM scratch AS local_src
-COPY . /src
-
-FROM lmsysorg/sglang:${SGLANG_IMAGE_TAG}
-
-WORKDIR /sgl-workspace
-
-ARG DYNAMO_COMMIT_SHA
-ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
-
-# Install jq for JSON processing
-RUN apt-get update -y \
-    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-        jq \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-
-# Install dynamo
-# Providing --build-arg BRANCH_TYPE=local will editable install the local dynamo repo
-# Providing --build-arg BRANCH_TYPE=remote will editable install the remote dynamo repo
-# Default is to install the latest published dynamo version
-ARG BRANCH_TYPE
-ARG CARGO_BUILD_JOBS
-
-COPY --from=local_src /src /tmp/local_src
-RUN if [ "$BRANCH_TYPE" = "local" ]; then \
-        cp -r /tmp/local_src /sgl-workspace/dynamo; \
-    elif [ "$BRANCH_TYPE" = "remote" ]; then \
-        git clone https://github.com/ai-dynamo/dynamo.git /sgl-workspace/dynamo; \
-    fi
-
-ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
-
-# SGLang does not use a venv in their container
-RUN if [ "$BRANCH_TYPE" = "local" ]; then \
-    cd dynamo/lib/bindings/python && \
-    pip install --break-system-packages maturin && \
-    maturin build --release && \
-    pip install --break-system-packages target/wheels/*.whl && \
-    cd /sgl-workspace/dynamo && \
-    pip install --break-system-packages -e . && \
-    pip install --break-system-packages --requirement /tmp/local_src/container/deps/requirements.txt ; \
-  elif [ "$BRANCH_TYPE" = "remote" ]; then \
-    cd dynamo/lib/bindings/python && \
-    pip install --break-system-packages maturin && \
-    maturin build --release && \
-    pip install --break-system-packages target/wheels/*.whl && \
-    cd /sgl-workspace/dynamo && \
-    pip install --break-system-packages -e . && \
-    pip install --break-system-packages --requirement /sgl-workspace/dynamo/container/deps/requirements.txt ; \
-  else \
-    pip install --break-system-packages ai-dynamo ; \
-  fi \
-&& rm -rf /tmp/local_src
-
-# Install NATS and ETCD
-RUN case "$(uname -m)" in \
-      x86_64) ARCH=amd64 ;; \
-      aarch64) ARCH=arm64 ;; \
-      *) echo "Unsupported architecture: $(uname -m)" && exit 1 ;; \
-    esac && \
-    wget --tries=3 --waitretry=5 \
-      https://github.com/nats-io/nats-server/releases/download/v2.10.28/\
-nats-server-v2.10.28-${ARCH}.deb && \
-    dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb
-
-ENV ETCD_VERSION="v3.5.21"
-RUN case "$(uname -m)" in \
-      x86_64) ARCH=amd64 ;; \
-      aarch64) ARCH=arm64 ;; \
-      *) echo "Unsupported architecture: $(uname -m)" && exit 1 ;; \
-    esac && \
-    wget --tries=3 --waitretry=5 \
-      https://github.com/etcd-io/etcd/releases/download/${ETCD_VERSION}/\
-etcd-${ETCD_VERSION}-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
-    mkdir -p /usr/local/bin/etcd && \
-    tar -xzf /tmp/etcd.tar.gz \
-        -C /usr/local/bin/etcd --strip-components=1 && \
-    rm /tmp/etcd.tar.gz
-
-ENV PATH=/usr/local/bin/etcd:$PATH
-
-# Enable forceful shutdown of inflight requests
-ENV SGL_FORCE_SHUTDOWN=1
-
-WORKDIR /sgl-workspace/dynamo/examples/backends/sglang