diff --git a/container/Dockerfile.none b/container/Dockerfile.none index 0d55d4f165..cb90e1201c 100644 --- a/container/Dockerfile.none +++ b/container/Dockerfile.none @@ -32,7 +32,7 @@ ENV RUSTUP_HOME=/usr/local/rustup \ RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \ echo "a3339fb004c3d0bb9862ba0bce001861fe5cbde9c10d16591eb3f39ee6cd3e7f *rustup-init" | sha256sum -c - && \ chmod +x rustup-init && \ - ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ + ./rustup-init -y --no-modify-path --profile default --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ rm rustup-init && \ chmod -R a+w $RUSTUP_HOME $CARGO_HOME diff --git a/container/Dockerfile.sglang b/container/Dockerfile.sglang index 04362be137..28746eaa0f 100644 --- a/container/Dockerfile.sglang +++ b/container/Dockerfile.sglang @@ -294,13 +294,11 @@ COPY components /workspace/components COPY launch /workspace/launch COPY deploy/sdk /workspace/deploy/sdk -# Build Rust crate binaries packaged with the wheel -RUN cargo build --release --locked --features mistralrs,python,dynamo-llm/block-manager \ - -p dynamo-run \ - -p llmctl \ - # Multiple http named crates are present in dependencies, need to specify the path - -p file://$PWD/components/http \ - -p metrics +RUN cargo build \ + --release \ + --locked \ + --features dynamo-llm/block-manager \ + --workspace # Build dynamo wheel RUN uv build --wheel --out-dir /workspace/dist && \ @@ -322,21 +320,13 @@ ENV CARGO_TARGET_DIR=/workspace/target WORKDIR /workspace -COPY --from=wheel_builder /workspace/dist/ /workspace/dist/ -COPY --from=wheel_builder /workspace/target/ /workspace/target/ +COPY --from=wheel_builder /workspace /workspace COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl # Copy Cargo cache to avoid re-downloading dependencies COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME -COPY . /workspace - -# Build rest of the crates -# Need to figure out rust caching to avoid rebuilding and remove exclude flags -RUN cargo build --release --locked --features block-manager --workspace \ - --exclude dynamo-run \ - --exclude llmctl \ - --exclude file://$PWD/components/http \ - --exclude metrics +# Build C bindings, creates lib/bindings/c/include +RUN cd /workspace/lib/bindings/c && cargo build --release --locked # Package the bindings RUN mkdir -p /opt/dynamo/bindings/wheels && \ diff --git a/container/Dockerfile.tensorrt_llm b/container/Dockerfile.tensorrt_llm index db9ca0d53d..2fbfa12499 100644 --- a/container/Dockerfile.tensorrt_llm +++ b/container/Dockerfile.tensorrt_llm @@ -204,7 +204,7 @@ ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \ # TODO: Add SHA check back based on RUSTARCH chmod +x rustup-init && \ - ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ + ./rustup-init -y --no-modify-path --profile default --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ rm rustup-init && \ chmod -R a+w $RUSTUP_HOME $CARGO_HOME @@ -273,13 +273,11 @@ COPY components /workspace/components COPY launch /workspace/launch COPY deploy/sdk /workspace/deploy/sdk -# Build Rust crate binaries packaged with the wheel -RUN cargo build --release --locked \ - -p dynamo-run \ - -p llmctl \ - # Multiple http named crates are present in dependencies, need to specify the path - -p file://$PWD/components/http \ - -p metrics +RUN cargo build \ + --release \ + --locked \ + --features dynamo-llm/block-manager \ + --workspace # Build dynamo wheels RUN uv build --wheel --out-dir /workspace/dist && \ @@ -297,20 +295,13 @@ RUN uv build --wheel --out-dir /workspace/dist && \ FROM build AS dev WORKDIR /workspace -COPY --from=wheel_builder /workspace/dist/ /workspace/dist/ -COPY --from=wheel_builder /workspace/target/ /workspace/target/ + +COPY --from=wheel_builder /workspace /workspace # Copy Cargo cache to avoid re-downloading dependencies COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME -COPY . /workspace - -# Build rest of the crates -# Need to figure out rust caching to avoid rebuilding and remove exclude flags -RUN cargo build --release --locked --workspace \ - --exclude dynamo-run \ - --exclude llmctl \ - --exclude file://$PWD/components/http \ - --exclude metrics +# Build C bindings, creates lib/bindings/c/include +RUN cd /workspace/lib/bindings/c && cargo build --release --locked # Package the bindings RUN mkdir -p /opt/dynamo/bindings/wheels && \ diff --git a/container/Dockerfile.vllm b/container/Dockerfile.vllm index e6b5661333..507a4b1831 100644 --- a/container/Dockerfile.vllm +++ b/container/Dockerfile.vllm @@ -100,9 +100,9 @@ RUN cd /usr/local/src && \ ldconfig ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/ucx/lib:$LD_LIBRARY_PATH -ENV CPATH=/usr/include:$CPATH +ENV CPATH=/usr/include ENV PATH=/usr/bin:$PATH -ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH +ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig SHELL ["/bin/bash", "-c"] WORKDIR /workspace @@ -259,7 +259,7 @@ ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \ # TODO: Add SHA check back based on RUSTARCH chmod +x rustup-init && \ - ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ + ./rustup-init -y --no-modify-path --profile default --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ rm rustup-init && \ chmod -R a+w $RUSTUP_HOME $CARGO_HOME @@ -372,13 +372,11 @@ COPY components /workspace/components COPY launch /workspace/launch COPY deploy/sdk /workspace/deploy/sdk -# Build Rust crate binaries packaged with the wheel -RUN cargo build --release --locked --features dynamo-llm/block-manager \ - -p dynamo-run \ - -p llmctl \ - # Multiple http named crates are present in dependencies, need to specify the path - -p file://$PWD/components/http \ - -p metrics +RUN cargo build \ + --release \ + --locked \ + --features dynamo-llm/block-manager \ + --workspace # Build dynamo wheel RUN uv build --wheel --out-dir /workspace/dist && \ @@ -400,21 +398,17 @@ ENV CARGO_TARGET_DIR=/workspace/target WORKDIR /workspace -COPY --from=wheel_builder /workspace/dist/ /workspace/dist/ -COPY --from=wheel_builder /workspace/target/ /workspace/target/ +COPY --from=wheel_builder /workspace /workspace COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl # Copy Cargo cache to avoid re-downloading dependencies COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME -COPY . /workspace - -# Build rest of the crates -# Need to figure out rust caching to avoid rebuilding and remove exclude flags -RUN cargo build --release --locked --features block-manager --workspace \ - --exclude dynamo-run \ - --exclude llmctl \ - --exclude file://$PWD/components/http \ - --exclude metrics +# Build C bindings, creates lib/bindings/c/include +# +# TODO: In theory the 'cargo build' in earlier stage covers this, we "just" need to copy the +# `lib/bindings/c/include` folder that build.rs generated across. +# I couldn't get that to work, hence TODO. +RUN cd /workspace/lib/bindings/c && cargo build --release --locked # Package the bindings RUN mkdir -p /opt/dynamo/bindings/wheels && \ diff --git a/docs/guides/dynamo_run.md b/docs/guides/dynamo_run.md index 8c5d26a643..c6245631d6 100644 --- a/docs/guides/dynamo_run.md +++ b/docs/guides/dynamo_run.md @@ -305,10 +305,10 @@ If you have multiple GPUs, mistral.rs does automatic tensor parallelism. You do #### llamacpp -Currently [llama.cpp](https://github.com/ggml-org/llama.cpp) is not included by default. Build it like this: +[llama.cpp](https://github.com/ggml-org/llama.cpp) is built for CPU by default. For an optimized build pass the appropriate feature flag (highly recommended): ``` -cargo build --features llamacpp[,cuda|metal|vulkan] -p dynamo-run +cargo build --features cuda|metal|vulkan -p dynamo-run ``` ``` diff --git a/launch/dynamo-run/Cargo.toml b/launch/dynamo-run/Cargo.toml index e8a00fede6..2f0ac76cb7 100644 --- a/launch/dynamo-run/Cargo.toml +++ b/launch/dynamo-run/Cargo.toml @@ -1,17 +1,5 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. [package] name = "dynamo-run" @@ -26,7 +14,7 @@ description = "Dynamo Run CLI" [features] # Build with `--no-default-features` to disable these defaults -default = ["mistralrs"] +default = ["mistralrs", "llamacpp"] mistralrs = ["dep:dynamo-engine-mistralrs"] llamacpp = ["dep:dynamo-engine-llamacpp"]