fix: merge conflicts

ai-dynamo · hhzhang16 · Jul 15, 2025 · Jul 11, 2025 · Jul 12, 2025 · Jul 14, 2025
commit f3dd01a80d18bfb66b773ff60e39e5e855a6c378
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/components/ingress/README b/components/ingress/README
@@ -0,0 +1,9 @@
+# Dynamo ingress / frontend node.
+
+Usage: `python -m dynamo.ingress [--http-port <port>]`. Port defaults to 8080.
+
+This runs an OpenAI compliant HTTP server, a pre-processor, and a router in a single process. Engines / workers are auto-discovered when they call `register_llm`.
+
+Requires `etcd` and `nats-server -js`.
+
+This is the same as `dynamo-run in=http out=dyn`.
diff --git a/examples/llm/benchmarks/__init__.py → ...ts/ingress/src/dynamo/ingress/__init__.py b/examples/llm/benchmarks/__init__.py → ...ts/ingress/src/dynamo/ingress/__init__.py
diff --git a/components/ingress/src/dynamo/ingress/__main__.py b/components/ingress/src/dynamo/ingress/__main__.py
@@ -0,0 +1,7 @@
+#  SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#  SPDX-License-Identifier: Apache-2.0
+
+from dynamo.ingress.main import main
+
+if __name__ == "__main__":
+    main()
diff --git a/components/ingress/src/dynamo/ingress/main.py b/components/ingress/src/dynamo/ingress/main.py
@@ -0,0 +1,61 @@
+#  SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#  SPDX-License-Identifier: Apache-2.0
+
+# Usage: `python -m dynamo.ingress [args]`
+#
+# Start a frontend node. This runs:
+# - OpenAI HTTP server.
+# - Auto-discovery: Watches etcd for engine/worker registration (via `register_llm`).
+# - Pre-processor: Prompt templating and tokenization.
+# - Router, defaulting to round-robin (TODO: Add flags to enable KV routing).
+
+import argparse
+import asyncio
+
+import uvloop
+
+from dynamo.llm import EngineType, EntrypointArgs, make_engine, run_input
+from dynamo.runtime import DistributedRuntime
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Dynamo Frontend: HTTP+Pre-processor+Router",
+        formatter_class=argparse.RawTextHelpFormatter,  # To preserve multi-line help formatting
+    )
+    parser.add_argument(
+        "--kv-cache-block-size", type=int, help="KV cache block size (u32)."
+    )
+    parser.add_argument(
+        "--http-port", type=int, default=8080, help="HTTP port for the engine (u16)."
+    )
+    flags = parser.parse_args()
+
+    kwargs = {"http_port": flags.http_port}
+    if flags.kv_cache_block_size is not None:
+        kwargs["kv_cache_block_size"] = flags.kv_cache_block_size
+
+    return kwargs
+
+
+async def async_main():
+    runtime = DistributedRuntime(asyncio.get_running_loop(), False)
+    flags = parse_args()
+
+    # out=dyn
+    e = EntrypointArgs(EngineType.Dynamic, **flags)
+    engine = await make_engine(runtime, e)
+
+    # in=http
+    try:
+        await run_input(runtime, "http", engine)
+    except asyncio.exceptions.CancelledError:
+        pass
+
+
+def main():
+    uvloop.run(async_main())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/container/Dockerfile.sglang b/container/Dockerfile.sglang
@@ -74,7 +74,32 @@ RUN apt-get update -y && \
 
 WORKDIR /workspace
 
-### TODO: Bring back UCX EFA setup once we are confident it works with IB devices
+### UCX EFA Setup ###
+RUN rm -rf /opt/hpcx/ucx
+RUN rm -rf /usr/local/ucx
+RUN cd /usr/local/src && \
+    git clone https://github.com/openucx/ucx.git && \
+    cd ucx &&                   \
+    git checkout v1.19.x &&     \
+    ./autogen.sh && ./configure \
+    --prefix=/usr/local/ucx     \
+    --enable-shared             \
+    --disable-static            \
+    --disable-doxygen-doc       \
+    --enable-optimizations      \
+    --enable-cma                \
+    --enable-devel-headers      \
+    --with-cuda=/usr/local/cuda \
+    --with-verbs                \
+    --with-efa                  \
+    --with-dm                   \
+    --with-gdrcopy=/usr/local   \
+    --enable-mt &&              \
+    make -j &&                  \
+    make -j install-strip &&    \
+    ldconfig
+
+ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/ucx/lib:$LD_LIBRARY_PATH
 ENV CPATH=/usr/include:$CPATH
 ENV PATH=/usr/bin:$PATH
 ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
@@ -127,21 +152,22 @@ ENV VIRTUAL_ENV=/opt/dynamo/venv
 ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
 
 # Install NIXL Python module
-# TODO: Move gds_path selection based on arch into NIXL build
-RUN if [ "$ARCH" = "arm64" ]; then \
-        cd /opt/nixl && uv pip install . --config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \
-    else \
-        cd /opt/nixl && uv pip install . ; \
-    fi
+RUN cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl
+
+# Install the wheel
+# TODO: Move NIXL wheel install to the wheel_builder stage
+RUN uv pip install /workspace/wheels/nixl/*.whl
 
 # Install sglang
-# This commit references a NIXL fix that was releasted after the 0.4.8.post1 release https://github.com/sgl-project/sglang/pull/7330
+# This commit references a NIXL fix that was released after the 0.4.8.post1 release https://github.com/sgl-project/sglang/pull/7330
+#TODO: Built wheel should become an artifact which can be cached and reused in subsequent builds
 ARG SGLANG_COMMIT="bb9b608c86ebad7d9d01e29fe058bc184dc7285f"
 RUN --mount=type=cache,target=/root/.cache/uv \
     cd /opt && \
     git clone https://github.com/sgl-project/sglang.git && \
     cd sglang && \
     git checkout ${SGLANG_COMMIT} && \
+    # Install in editable mode for development
     uv pip install -e "python[all]"
 
 # Set env var that allows for forceful shutdown of inflight requests in SGL's TokenizerManager
@@ -380,20 +406,43 @@ ENV DYNAMO_HOME=/workspace
 ENV VIRTUAL_ENV=/opt/dynamo/venv
 ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
 
+### COPY NATS & ETCD ###
+# Copy nats and etcd from base image
+COPY --from=base /usr/bin/nats-server /usr/bin/nats-server
+COPY --from=base /usr/local/bin/etcd/ /usr/local/bin/etcd/
+ENV PATH=/usr/local/bin/etcd/:$PATH
+
+# Copy UCX from base image as plugin for NIXL
+# Copy NIXL source from base image (required for NIXL plugins)
+COPY --from=base /usr/local/ucx /usr/local/ucx
+COPY --from=base /usr/local/nixl /usr/local/nixl
+ARG ARCH_ALT
+ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins
+ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu:/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins:/usr/local/ucx/lib:$LD_LIBRARY_PATH
+
 # Setup the python environment
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
 RUN apt-get update && \
-    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3-dev && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends build-essential python3-dev libnuma-dev && \
     rm -rf /var/lib/apt/lists/* && \
     uv venv $VIRTUAL_ENV --python 3.12 && \
     echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
 
+# Install SGLang and related packages (sgl-kernel, einops, sentencepiece) since they are not included in the runtime wheel
+# https://github.com/sgl-project/sglang/blob/v0.4.9.post1/python/pyproject.toml#L18-51
+RUN uv pip install "sglang[runtime_common]>=0.4.9.post1" && \
+    uv pip install einops && \
+    uv pip install sgl-kernel==0.2.4 && \
+    uv pip install sentencepiece
+
 # Install the wheels and symlink executables to /usr/local/bin so dynamo components can use them
 # Dynamo components currently do not have the VIRTUAL_ENV in their PATH, so we need to symlink the executables
 COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
-RUN uv pip install ai-dynamo[vllm] --find-links wheelhouse && \
-    ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/ && \
-    rm -r wheelhouse
+COPY --from=base /workspace/wheels/nixl/*.whl wheelhouse/
+RUN uv pip install ai-dynamo --find-links wheelhouse && \
+    uv pip install ai-dynamo-runtime --find-links wheelhouse && \
+    uv pip install nixl --find-links wheelhouse && \
+    ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/
 
 # Tell vllm to use the Dynamo LLM C API for KV Cache Routing
 ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
@@ -403,8 +452,9 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
     sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
     echo "cat ~/.launch_screen" >> ~/.bashrc
 
-# Copy examples
-COPY ./examples examples/
+# Copy examples and set up Python path
+COPY . /workspace
+ENV PYTHONPATH=/workspace/examples/sglang/utils:$PYTHONPATH
 
-ENTRYPOINT [ "/usr/bin/bash" ]
+ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
 CMD []
diff --git a/container/build.sh b/container/build.sh
@@ -114,7 +114,7 @@ SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
 VLLM_V1_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
 VLLM_V1_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
 
-NIXL_COMMIT=3503658e71143b56f9d5b1b440d84a94b9c41af8
+NIXL_COMMIT=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
 NIXL_REPO=ai-dynamo/nixl.git
 
 NIXL_UCX_EFA_REF=7ec95b95e524a87e81cac92f5ca8523e3966b16b

diff --git a/deploy/CONTRIBUTING.md b/deploy/CONTRIBUTING.md
@@ -0,0 +1,163 @@
+# Contributing to Dynamo Deploy
+
+Welcome to the Dynamo Deploy project! This guide will help you get started with contributing to the deployment infrastructure and tooling for the Dynamo distributed inference platform.
+
+## Getting Started
+
+### Prerequisites
+
+
+### Quick Setup
+
+### Project Structure
+
+The deploy directory contains several key components:
+
+```
+deploy/
+├── cloud/                    # Cloud deployment platform
+│   ├── helm/                # Cloud platform Helm charts
+│   └── operator/            # Kubernetes operator (Go)
+├── helm/                    # Manual deployment Helm charts
+├── metrics/                 # Monitoring and observability
+├── sdk/                     # Python scripts
+└── inference-gateway/       # Gateway components
+```
+
+## Development Environment
+
+### Setting Up Your Environment
+
+
+### IDE Configuration
+
+**VS Code:**
+
+- Install Go extension
+- Install Python extension
+- Configure settings for Go formatting and linting
+- Add workspace settings for consistent formatting
+
+### Contribution Workflow Caveats
+
+- We do signed commits
+
+```bash
+commit -S
+```
+
+- Every time you modify `deploy/cloud/helm/crds/templates/*.yaml`, please bump up the version of the CRD helm chart in
+    1. deploy/cloud/helm/platform/components/operator/Chart.yaml
+    2. deploy/cloud/helm/platform/Chart.yaml
+then
+
+```bash
+deploy/cloud/helm/platform
+helm dependency update
+```
+
+#### Commit Message Guidelines
+
+Follow conventional commit format:
+
+- `feat:` new features
+- `fix:` bug fixes
+- `docs:` documentation changes
+- `test:` adding or updating tests
+- `refactor:` code refactoring
+- `perf:` performance improvements
+- `ci:` CI/CD changes
+
+Examples:
+
+```
+feat(operator): add support for custom resource limits
+fix(sdk): resolve service discovery timeout issue
+docs(helm): update deployment guide with new examples
+test(e2e): add integration tests for disaggregated serving
+```
+
+## Style Guide
+
+### Go Code Style (Operator)
+
+Follow standard Go conventions.
+
+
+### Python Code Style (SDK)
+
+Follow PEP 8 and use modern Python practices:
+
+
+### YAML/Helm Templates
+
+```yaml
+# Use consistent indentation (2 spaces)
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "dynamo.fullname" . }}
+  labels:
+    {{- include "dynamo.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "dynamo.selectorLabels" . | nindent 6 }}
+```
+
+## Testing
+
+Once you have an MR up and standard checks pass trigger the integration tests by adding the comment “/ok to test <COMMIT-ID> “
+
+
+### Unit Tests
+
+**Go Tests (Operator):**
+
+```bash
+cd deploy/cloud/operator
+go test ./... -v
+go test -race ./...
+```
+
+**Python Tests (SDK):**
+
+```bash
+cd deploy/sdk
+pytest tests/ -v
+pytest tests/ --cov=dynamo.sdk
+```
+
+### Integration Tests
+
+**End-to-End Deployment Tests:**
+
+```bash
+# Run full deployment test suite
+pytest tests/serve/test_dynamo_serve.py -v
+
+# Test specific deployment scenarios
+pytest tests/serve/test_dynamo_serve.py::test_serve_deployment[agg] -v
+```
+
+**Operator Integration Tests:**
+
+```bash
+cd deploy/cloud/operator
+make test-e2e
+```
+
+### Writing Tests
+
+**Example Unit Test:**
+
+**Example Integration Test:**
+
+
+### Examples Testing
+
+Ensure documentation examples work.
+
+
+Thank you for contributing to Dynamo Deploy! 🚀