diff --git a/Cargo.lock b/Cargo.lock index aa40e16063b..4a506af1c7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -814,9 +814,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.20" +version = "1.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04da6a0d40b948dfc4fa8f5bbf402b0fc1a64a28dbf7d12ffd683550f2c1b63a" +checksum = "16595d3be041c03b09d08d0858631facccee9221e579704070e6e9e4915d3bc7" dependencies = [ "jobserver", "libc", @@ -1555,7 +1555,7 @@ dependencies = [ [[package]] name = "dynamo-engine-llamacpp" -version = "0.2.1" +version = "0.3.0" dependencies = [ "async-stream", "dynamo-llm", @@ -1567,7 +1567,7 @@ dependencies = [ [[package]] name = "dynamo-engine-mistralrs" -version = "0.2.1" +version = "0.3.0" dependencies = [ "anyhow", "async-openai", @@ -1586,7 +1586,7 @@ dependencies = [ [[package]] name = "dynamo-llm" -version = "0.2.1" +version = "0.3.0" dependencies = [ "akin", "aligned-vec", @@ -1658,7 +1658,7 @@ dependencies = [ [[package]] name = "dynamo-run" -version = "0.2.1" +version = "0.3.0" dependencies = [ "anyhow", "async-openai", @@ -1686,7 +1686,7 @@ dependencies = [ [[package]] name = "dynamo-runtime" -version = "0.2.1" +version = "0.3.0" dependencies = [ "anyhow", "assert_matches", @@ -1735,7 +1735,7 @@ dependencies = [ [[package]] name = "dynamo-tokens" -version = "0.2.1" +version = "0.3.0" dependencies = [ "bytemuck", "derive-getters", @@ -2733,18 +2733,6 @@ dependencies = [ "match_token", ] -[[package]] -name = "http" -version = "0.2.1" -dependencies = [ - "clap", - "dynamo-llm", - "dynamo-runtime", - "serde", - "serde_json", - "tokio", -] - [[package]] name = "http" version = "0.2.12" @@ -2756,6 +2744,18 @@ dependencies = [ "itoa", ] +[[package]] +name = "http" +version = "0.3.0" +dependencies = [ + "clap", + "dynamo-llm", + "dynamo-runtime", + "serde", + "serde_json", + "tokio", +] + [[package]] name = "http" version = "1.3.1" @@ -3364,7 +3364,7 @@ checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "libdynamo_llm" -version = "0.2.1" +version = "0.3.0" dependencies = [ "anyhow", "async-once-cell", @@ -3477,7 +3477,7 @@ dependencies = [ [[package]] name = "llmctl" -version = "0.2.1" +version = "0.3.0" dependencies = [ "anyhow", "clap", @@ -3677,7 +3677,7 @@ dependencies = [ [[package]] name = "metrics" -version = "0.2.1" +version = "0.3.0" dependencies = [ "axum 0.6.20", "clap", @@ -4084,13 +4084,14 @@ dependencies = [ [[package]] name = "nixl-sys" -version = "0.2.1-rc.3" +version = "0.3.0-rc.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfeec33e0229249e8688309a70c677f522446f9ac92105a85aad4a5ceef2dd2f" +checksum = "6a1727039c315bf41aeadcdbea02522b8a3df69a77bd732996243502a5fb6f07" dependencies = [ "bindgen 0.71.1", "cc", "libc", + "os_info", "pkg-config", "serde", "thiserror 2.0.12", @@ -4387,6 +4388,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "os_info" +version = "3.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fc863e2ca13dc2d5c34fb22ea4a588248ac14db929616ba65c45f21744b1e9" +dependencies = [ + "log", + "serde", + "windows-sys 0.52.0", +] + [[package]] name = "overload" version = "0.1.1" @@ -5333,7 +5345,7 @@ dependencies = [ [[package]] name = "router" -version = "0.2.1" +version = "0.3.0" dependencies = [ "clap", "dynamo-llm", diff --git a/Cargo.toml b/Cargo.toml index e19903317dd..c64295a6c88 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,7 @@ members = [ resolver = "3" [workspace.package] -version = "0.2.1" +version = "0.3.0" edition = "2021" description = "Dynamo Inference Framework" authors = ["NVIDIA Inc. "] @@ -39,9 +39,9 @@ keywords = ["llm", "genai", "inference", "nvidia", "distributed", "dynamo"] [workspace.dependencies] # Local crates -dynamo-runtime = { path = "lib/runtime", version = "0.2.1" } -dynamo-llm = { path = "lib/llm", version = "0.2.1" } -dynamo-tokens = { path = "lib/tokens", version = "0.2.1" } +dynamo-runtime = { path = "lib/runtime", version = "0.3.0" } +dynamo-llm = { path = "lib/llm", version = "0.3.0" } +dynamo-tokens = { path = "lib/tokens", version = "0.3.0" } # External dependencies anyhow = { version = "1" } diff --git a/container/Dockerfile.vllm b/container/Dockerfile.vllm index 507a4b1831f..4ba0f495799 100644 --- a/container/Dockerfile.vllm +++ b/container/Dockerfile.vllm @@ -169,7 +169,7 @@ RUN uv pip install /workspace/wheels/nixl/*.whl ARG VLLM_REF="0.8.4" ARG VLLM_PATCH="vllm_v${VLLM_REF}-dynamo-kv-disagg-patch.patch" ARG VLLM_PATCHED_PACKAGE_NAME="ai_dynamo_vllm" -ARG VLLM_PATCHED_PACKAGE_VERSION="0.8.4.post1" +ARG VLLM_PATCHED_PACKAGE_VERSION="0.8.4.post2" ARG VLLM_MAX_JOBS=4 RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \ --mount=type=cache,target=/root/.cache/uv \ diff --git a/container/build.sh b/container/build.sh index 64f6f6f0bda..9cd4ef3fa50 100755 --- a/container/build.sh +++ b/container/build.sh @@ -109,7 +109,7 @@ NONE_BASE_IMAGE_TAG="24.04" SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" -NIXL_COMMIT=78695c2900cd7fff506764377386592dfc98e87e +NIXL_COMMIT=f531404be4866d85ed618b3baf4008c636798d63 NIXL_REPO=ai-dynamo/nixl.git NO_CACHE="" diff --git a/docs/support_matrix.md b/docs/support_matrix.md index a7cf7361c8f..53774b0d4cf 100644 --- a/docs/support_matrix.md +++ b/docs/support_matrix.md @@ -62,21 +62,21 @@ For **Linux**, the **ARM64** support is experimental and may have limitations. W ### Runtime Dependency | **Python Package** | **Version** | glibc version | CUDA Version | |--------------------|---------------|----------------------|--------------| -| ai-dynamo | 0.2.1 | >=2.28 | | -| ai-dynamo-runtime | 0.2.1 | >=2.28 | | -| ai-dynamo-vllm | 0.8.4.post1* | >=2.28 (recommended) | | -| NIXL | 0.2.1 | >=2.27 | >=11.8 | +| ai-dynamo | 0.3.0 | >=2.28 | | +| ai-dynamo-runtime | 0.3.0 | >=2.28 | | +| ai-dynamo-vllm | 0.8.4.post2* | >=2.28 (recommended) | | +| NIXL | 0.3.0 | >=2.27 | >=11.8 | ### Build Dependency | **Build Dependency** | **Version** | |----------------------|-------------| | **Base Container** | [25.03](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda-dl-base/tags) | -| **ai-dynamo-vllm** |0.8.4.post1* | +| **ai-dynamo-vllm** |0.8.4.post2* | | **TensorRT-LLM** | 0.19.0** | -| **NIXL** | 0.2.1 | +| **NIXL** | 0.3.0 | > **Note**: -> - *ai-dynamo-vllm v0.8.4.post1 is a customized patch of v0.8.4 from vLLM. +> - *ai-dynamo-vllm v0.8.4.post2 is a customized patch of v0.8.4 from vLLM. > - **Specific versions of TensorRT-LLM supported by Dynamo are subject to change. diff --git a/lib/bindings/python/Cargo.lock b/lib/bindings/python/Cargo.lock index fcd3403e7e4..b0890f08ca7 100644 --- a/lib/bindings/python/Cargo.lock +++ b/lib/bindings/python/Cargo.lock @@ -577,9 +577,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.20" +version = "1.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04da6a0d40b948dfc4fa8f5bbf402b0fc1a64a28dbf7d12ffd683550f2c1b63a" +checksum = "16595d3be041c03b09d08d0858631facccee9221e579704070e6e9e4915d3bc7" dependencies = [ "jobserver", "libc", @@ -1092,7 +1092,7 @@ dependencies = [ [[package]] name = "dynamo-llm" -version = "0.2.1" +version = "0.3.0" dependencies = [ "akin", "anyhow", @@ -1155,7 +1155,7 @@ dependencies = [ [[package]] name = "dynamo-py3" -version = "0.2.1" +version = "0.3.0" dependencies = [ "anyhow", "async-openai", @@ -1180,7 +1180,7 @@ dependencies = [ [[package]] name = "dynamo-runtime" -version = "0.2.1" +version = "0.3.0" dependencies = [ "anyhow", "async-nats", @@ -2810,13 +2810,14 @@ dependencies = [ [[package]] name = "nixl-sys" -version = "0.2.1" +version = "0.3.0-rc.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84bf333c75733cad60b29873d84168f841c6bd5207ae9dfbda7490a99c1ebe94" +checksum = "6a1727039c315bf41aeadcdbea02522b8a3df69a77bd732996243502a5fb6f07" dependencies = [ "bindgen", "cc", "libc", + "os_info", "pkg-config", "serde", "thiserror 2.0.12", @@ -3065,6 +3066,17 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "os_info" +version = "3.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fc863e2ca13dc2d5c34fb22ea4a588248ac14db929616ba65c45f21744b1e9" +dependencies = [ + "log", + "serde", + "windows-sys 0.52.0", +] + [[package]] name = "overload" version = "0.1.1" diff --git a/lib/bindings/python/Cargo.toml b/lib/bindings/python/Cargo.toml index 16283feb758..3eb022ffd41 100644 --- a/lib/bindings/python/Cargo.toml +++ b/lib/bindings/python/Cargo.toml @@ -19,7 +19,7 @@ [package] name = "dynamo-py3" -version = "0.2.1" +version = "0.3.0" edition = "2021" authors = ["NVIDIA"] license = "Apache-2.0" @@ -75,3 +75,4 @@ pyo3-async-runtimes = { version = "0.23.0", default-features = false, features = pythonize = "0.23" dlpark = { version = "0.5", features = ["pyo3", "half"], optional = true } + diff --git a/lib/llm/Cargo.toml b/lib/llm/Cargo.toml index 9540f7fe0cd..cf3c360e60b 100644 --- a/lib/llm/Cargo.toml +++ b/lib/llm/Cargo.toml @@ -81,7 +81,7 @@ regex = "1" rayon = "1" # block_manager -nixl-sys = { version = "0.2.1-rc.3", optional = true } +nixl-sys = { version = "0.3.0-rc.2", optional = true } cudarc = { version = "0.16.2", features = ["cuda-12020"], optional = true } ndarray = { version = "0.16", optional = true } nix = { version = "0.26", optional = true } diff --git a/lib/runtime/examples/Cargo.lock b/lib/runtime/examples/Cargo.lock index 7d1f5dbd7a6..6f44ba0fbb2 100644 --- a/lib/runtime/examples/Cargo.lock +++ b/lib/runtime/examples/Cargo.lock @@ -623,7 +623,7 @@ dependencies = [ [[package]] name = "dynamo-runtime" -version = "0.2.1" +version = "0.3.0" dependencies = [ "anyhow", "async-nats", @@ -994,7 +994,7 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hello_world" -version = "0.2.1" +version = "0.3.0" dependencies = [ "dynamo-runtime", ] @@ -2306,7 +2306,7 @@ dependencies = [ [[package]] name = "service_metrics" -version = "0.2.1" +version = "0.3.0" dependencies = [ "dynamo-runtime", "futures", diff --git a/lib/runtime/examples/Cargo.toml b/lib/runtime/examples/Cargo.toml index 3d01b7203b6..8f6846bf216 100644 --- a/lib/runtime/examples/Cargo.toml +++ b/lib/runtime/examples/Cargo.toml @@ -21,7 +21,7 @@ members = [ resolver = "3" [workspace.package] -version = "0.2.1" +version = "0.3.0" edition = "2021" authors = ["NVIDIA"] license = "Apache-2.0" diff --git a/pyproject.toml b/pyproject.toml index 88d4c1281a9..a5e2ffc2d15 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ [project] name = "ai-dynamo" -version = "0.2.1" +version = "0.3.0" description = "Distributed Inference Framework" readme = "README.md" authors = [ @@ -29,7 +29,7 @@ dependencies = [ "bentoml==1.4.8", "types-psutil==7.0.0.20250218", "kubernetes==32.0.1", - "ai-dynamo-runtime==0.2.1", + "ai-dynamo-runtime==0.3.0", "fastapi==0.115.6", "distro", "typer",