diff --git a/Cargo.lock b/Cargo.lock index 86be425596..621e6e7207 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -240,7 +240,7 @@ checksum = "4288f83726785267c6f2ef073a3d83dc3f9b81464e9f99898240cced85fce35a" [[package]] name = "async-openai-macros" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "proc-macro2", "quote", @@ -1836,7 +1836,7 @@ dependencies = [ [[package]] name = "dynamo-async-openai" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "async-openai-macros", "backoff", @@ -1862,7 +1862,7 @@ dependencies = [ [[package]] name = "dynamo-engine-llamacpp" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "async-stream", "dynamo-llm", @@ -1874,7 +1874,7 @@ dependencies = [ [[package]] name = "dynamo-engine-mistralrs" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "anyhow", "async-stream", @@ -1892,7 +1892,7 @@ dependencies = [ [[package]] name = "dynamo-llm" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "ahash", "akin", @@ -1975,7 +1975,7 @@ dependencies = [ [[package]] name = "dynamo-run" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "anyhow", "async-stream", @@ -2004,7 +2004,7 @@ dependencies = [ [[package]] name = "dynamo-runtime" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "anyhow", "arc-swap", @@ -2061,7 +2061,7 @@ dependencies = [ [[package]] name = "dynamo-tokens" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "bytemuck", "derive-getters", @@ -3818,7 +3818,7 @@ checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" [[package]] name = "libdynamo_llm" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "anyhow", "async-once-cell", @@ -4130,7 +4130,7 @@ dependencies = [ [[package]] name = "metrics" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "axum 0.8.4", "clap 4.5.42", @@ -6037,7 +6037,7 @@ dependencies = [ [[package]] name = "router" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "clap 4.5.42", "dynamo-llm", diff --git a/Cargo.toml b/Cargo.toml index 11462c2dde..da4f5ace34 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ members = [ resolver = "3" [workspace.package] -version = "0.4.0+post0" +version = "0.4.1" edition = "2021" description = "Dynamo Inference Framework" authors = ["NVIDIA Inc. "] @@ -28,10 +28,10 @@ keywords = ["llm", "genai", "inference", "nvidia", "distributed", "dynamo"] [workspace.dependencies] # Local crates -dynamo-runtime = { path = "lib/runtime", version = "0.4.0" } -dynamo-llm = { path = "lib/llm", version = "0.4.0" } -dynamo-tokens = { path = "lib/tokens", version = "0.4.0" } -dynamo-async-openai = { path = "lib/async-openai", version = "0.4.0", features = ["byot", "rustls"]} +dynamo-runtime = { path = "lib/runtime", version = "0.4.1" } +dynamo-llm = { path = "lib/llm", version = "0.4.1" } +dynamo-tokens = { path = "lib/tokens", version = "0.4.1" } +dynamo-async-openai = { path = "lib/async-openai", version = "0.4.1", features = ["byot", "rustls"]} # External dependencies anyhow = { version = "1" } diff --git a/docs/support_matrix.md b/docs/support_matrix.md index 382aad2c78..98e802dd56 100644 --- a/docs/support_matrix.md +++ b/docs/support_matrix.md @@ -58,17 +58,17 @@ If you are using a **GPU**, the following GPU models and architectures are suppo | **Python Package** | **Version** | glibc version | CUDA Version | | :----------------- | :------------ | :----------------------------------- | :----------- | -| ai-dynamo | 0.4.0 | >=2.28 | | -| ai-dynamo-runtime | 0.4.0 | >=2.28 (Python 3.12 has known issues)| | -| NIXL | 0.5.0 | >=2.27 | >=11.8 | +| ai-dynamo | 0.4.1 | >=2.28 | | +| ai-dynamo-runtime | 0.4.1 | >=2.28 (Python 3.12 has known issues)| | +| NIXL | 0.4.1 | >=2.27 | >=11.8 | ### Build Dependency | **Build Dependency** | **Version** | | :------------------- | :------------------------------------------------------------------------------- | | **Base Container** | [25.03](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda-dl-base/tags) | -| **TensorRT-LLM** | 1.0.0rc² | -| **NIXL** | 0.4.0 | +| **TensorRT-LLM** | 1.0.0rc4 | +| **NIXL** | 0.4.1 | > [!Important] > ² Specific versions of TensorRT-LLM supported by Dynamo are subject to change. diff --git a/lib/bindings/python/Cargo.lock b/lib/bindings/python/Cargo.lock index 48d866eb5d..4eeb72591a 100644 --- a/lib/bindings/python/Cargo.lock +++ b/lib/bindings/python/Cargo.lock @@ -198,37 +198,9 @@ version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4288f83726785267c6f2ef073a3d83dc3f9b81464e9f99898240cced85fce35a" -[[package]] -name = "async-openai" -version = "0.29.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0864223dd927e9418f2fdb7fd773b34482518596c17693ff2cdb216261d22913" -dependencies = [ - "async-openai-macros", - "backoff", - "base64 0.22.1", - "bytes", - "derive_builder", - "eventsource-stream", - "futures", - "rand 0.9.2", - "reqwest", - "reqwest-eventsource", - "secrecy", - "serde", - "serde_json", - "thiserror 2.0.15", - "tokio", - "tokio-stream", - "tokio-util", - "tracing", -] - [[package]] name = "async-openai-macros" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0289cba6d5143bfe8251d57b4a8cac036adf158525a76533a7082ba65ec76398" +version = "0.4.1" dependencies = [ "proc-macro2", "quote", @@ -1224,15 +1196,38 @@ dependencies = [ "bytemuck", ] +[[package]] +name = "dynamo-async-openai" +version = "0.4.1" +dependencies = [ + "async-openai-macros", + "backoff", + "base64 0.22.1", + "bytes", + "derive_builder", + "eventsource-stream", + "futures", + "rand 0.9.2", + "reqwest", + "reqwest-eventsource", + "secrecy", + "serde", + "serde_json", + "thiserror 2.0.15", + "tokio", + "tokio-stream", + "tokio-util", + "tracing", +] + [[package]] name = "dynamo-llm" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "ahash", "akin", "anyhow", "async-nats", - "async-openai", "async-stream", "async-trait", "async_zmq", @@ -1249,6 +1244,7 @@ dependencies = [ "derive-getters", "derive_builder", "dialoguer", + "dynamo-async-openai", "dynamo-runtime", "either", "erased-serde", @@ -1298,15 +1294,15 @@ dependencies = [ [[package]] name = "dynamo-py3" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "anyhow", - "async-openai", "async-stream", "async-trait", "cudarc 0.16.6", "derive-getters", "dlpark", + "dynamo-async-openai", "dynamo-llm", "dynamo-runtime", "either", @@ -1329,7 +1325,7 @@ dependencies = [ [[package]] name = "dynamo-runtime" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "anyhow", "arc-swap", diff --git a/lib/bindings/python/Cargo.toml b/lib/bindings/python/Cargo.toml index 30f1945a7c..81d5ee00cc 100644 --- a/lib/bindings/python/Cargo.toml +++ b/lib/bindings/python/Cargo.toml @@ -19,7 +19,7 @@ [package] name = "dynamo-py3" -version = "0.4.0+post0" +version = "0.4.1" edition = "2021" authors = ["NVIDIA"] license = "Apache-2.0" diff --git a/lib/bindings/python/pyproject.toml b/lib/bindings/python/pyproject.toml index e4b0eb96c4..7a7167c89e 100644 --- a/lib/bindings/python/pyproject.toml +++ b/lib/bindings/python/pyproject.toml @@ -16,7 +16,7 @@ [project] name = "ai-dynamo-runtime" -version = "0.4.0.post0" +version = "0.4.1" description = "Dynamo Inference Framework Runtime" readme = "README.md" authors = [ diff --git a/lib/runtime/examples/Cargo.lock b/lib/runtime/examples/Cargo.lock index d5ab1a17c7..31278cc7d8 100644 --- a/lib/runtime/examples/Cargo.lock +++ b/lib/runtime/examples/Cargo.lock @@ -648,7 +648,7 @@ dependencies = [ [[package]] name = "dynamo-runtime" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "anyhow", "arc-swap", @@ -1020,7 +1020,7 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hello_world" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "dynamo-runtime", ] @@ -2492,7 +2492,7 @@ dependencies = [ [[package]] name = "service_metrics" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "dynamo-runtime", "futures", @@ -2668,7 +2668,7 @@ dependencies = [ [[package]] name = "system_metrics" -version = "0.4.0+post0" +version = "0.4.1" dependencies = [ "anyhow", "dynamo-runtime", @@ -2881,6 +2881,8 @@ dependencies = [ "bytes", "futures-core", "futures-sink", + "futures-util", + "hashbrown", "pin-project-lite", "tokio", ] diff --git a/lib/runtime/examples/Cargo.toml b/lib/runtime/examples/Cargo.toml index cb9a7cb6b3..ebecf2a55e 100644 --- a/lib/runtime/examples/Cargo.toml +++ b/lib/runtime/examples/Cargo.toml @@ -22,7 +22,7 @@ members = [ resolver = "3" [workspace.package] -version = "0.4.0+post0" +version = "0.4.1" edition = "2021" authors = ["NVIDIA"] license = "Apache-2.0" diff --git a/pyproject.toml b/pyproject.toml index 5d4800853e..f30a1429d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ [project] name = "ai-dynamo" -version = "0.4.0.post0" +version = "0.4.1" description = "Distributed Inference Framework" readme = "README.md" authors = [ @@ -13,7 +13,7 @@ license = { text = "Apache-2.0" } license-files = ["LICENSE"] requires-python = ">=3.10" dependencies = [ - "ai-dynamo-runtime==0.4.0.post0", + "ai-dynamo-runtime==0.4.1", "pytest>=8.3.4", "types-psutil>=7.0.0.20250218", "kubernetes>=32.0.1,<33.0.0",