Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
fix: Make cli example more like dynamo-run
  • Loading branch information
grahamking committed Jul 8, 2025
commit 5a33bce27e8f5f6c38aa205d60e3675dd0e2972f
138 changes: 88 additions & 50 deletions examples/cli/cli.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# Example cli using the Python bindings.
# Usage: `python cli.py text mistralrs --model-path <your-model>`.
# If `--model-path` not provided defaults to Qwen3 0.6B.
# Must be in a virtualenv with the bindings (or wheel) installed.
# Example cli using the Python bindings, similar to `dynamo-run`.
# Usage: `python cli.py in=text out=mistralrs <your-model>`.
# Must be in a virtualenv with the Dynamo bindings (or wheel) installed.

import argparse
import asyncio
Expand All @@ -18,34 +17,36 @@


def parse_args():
"""
Parses command-line arguments for the program.
"""
in_mode = "text"
out_mode = "echo"
batch_file = None # Specific to in_mode="batch"

# List to hold arguments that argparse will process (flags and model path)
argparse_args = []

# --- Step 1: Manual Pre-parsing for 'in=' and 'out=' ---
# Iterate through sys.argv[1:] to extract in= and out=
# and collect remaining arguments for argparse.
for arg in sys.argv[1:]:
if arg.startswith("in="):
in_val = arg[len("in=") :]
if in_val.startswith("batch:"):
in_mode = "batch"
batch_file = in_val[len("batch:") :]
else:
in_mode = in_val
elif arg.startswith("out="):
out_mode = arg[len("out=") :]
else:
# This argument is not 'in=' or 'out=', so it's either a flag or the model path
argparse_args.append(arg)

# --- Step 2: Argparse for flags and the model path ---
parser = argparse.ArgumentParser(
description="Run a Dynamo LLM engine with configurable parameters.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter, # Show default values in help
description="Dynamo CLI: Connect inputs to an engine",
formatter_class=argparse.RawTextHelpFormatter, # To preserve multi-line help formatting
)

# Positional arguments (replacing sys.argv[1] and sys.argv[2])
parser.add_argument(
"input_source",
type=str,
help="Input source for the engine: 'text', 'http', 'stdin', 'batch:file.jsonl', 'dyn://<name>'",
)
parser.add_argument(
"output_type",
type=str,
help="Output type (engine type): 'echo', 'mistralrs', 'llamacpp', 'dyn'",
)

# Optional arguments corresponding to EntrypointArgs fields
# model_path: Option<PathBuf>
parser.add_argument(
"--model-path",
type=Path,
default=Path("Qwen/Qwen3-0.6B"),
help="Path to the model directory.",
)
# model_name: Option<String>
parser.add_argument("--model-name", type=str, help="Name of the model to load.")
# model_config: Option<PathBuf>
Expand All @@ -69,8 +70,45 @@ def parse_args():
# http_port: Option<u16>
parser.add_argument("--http-port", type=int, help="HTTP port for the engine (u16).")

args = parser.parse_args()
return args
# TODO: Not yet used here
parser.add_argument(
"--tensor-parallel-size",
type=int,
help="Tensor parallel size for the model (e.g., 4).",
)

# Add the positional model argument.
# It's made optional (nargs='?') because its requirement depends on 'out_mode',
# which is handled in post-parsing validation.
parser.add_argument(
"model",
nargs="?", # Make it optional for argparse, we'll validate manually
help="Path to the model (e.g., Qwen/Qwen3-0.6B).\n" "Required unless out=dyn.",
)

# Parse the arguments that were not 'in=' or 'out='
flags = parser.parse_args(argparse_args)

# --- Step 3: Post-parsing Validation and Final Assignment ---

# Validate 'batch' mode requires a file path
if in_mode == "batch" and not batch_file:
parser.error("Batch mode requires a file path: in=batch:FILE")

# Validate model path requirement based on 'out_mode'
if out_mode != "dyn" and flags.model is None:
parser.error("Model path is required unless out=dyn.")

# Consolidate all parsed arguments into a dictionary
parsed_args = {
"in_mode": in_mode,
"out_mode": out_mode,
"batch_file": batch_file, # Will be None if in_mode is not "batch"
"model_path": flags.model,
"flags": flags,
}

return parsed_args


async def run():
Expand All @@ -79,39 +117,39 @@ async def run():

args = parse_args()

input = args.input_source
output = args.output_type

engine_type_map = {
"echo": EngineType.Echo,
"mistralrs": EngineType.MistralRs,
"llamacpp": EngineType.LlamaCpp,
"dyn": EngineType.Dynamic,
}
engine_type = engine_type_map.get(output)
out_mode = args["out_mode"]
engine_type = engine_type_map.get(out_mode)
if engine_type is None:
print(f"Unsupported output type: {output}")
print(f"Unsupported output type: {out_mode}")
sys.exit(1)

# TODO: The "vllm", "sglang" and "trtllm" cases should call Python directly

entrypoint_kwargs = {"model_path": args.model_path}
if args.model_name is not None:
entrypoint_kwargs["model_name"] = args.model_name
if args.model_config is not None:
entrypoint_kwargs["model_config"] = args.model_config
if args.context_length is not None:
entrypoint_kwargs["context_length"] = args.context_length
if args.template_file is not None:
entrypoint_kwargs["template_file"] = args.template_file
if args.kv_cache_block_size is not None:
entrypoint_kwargs["kv_cache_block_size"] = args.kv_cache_block_size
if args.http_port is not None:
entrypoint_kwargs["http_port"] = args.http_port
entrypoint_kwargs = {"model_path": args["model_path"]}

flags = args["flags"]
if flags.model_name is not None:
entrypoint_kwargs["model_name"] = flags.model_name
if flags.model_config is not None:
entrypoint_kwargs["model_config"] = flags.model_config
if flags.context_length is not None:
entrypoint_kwargs["context_length"] = flags.context_length
if flags.template_file is not None:
entrypoint_kwargs["template_file"] = flags.template_file
if flags.kv_cache_block_size is not None:
entrypoint_kwargs["kv_cache_block_size"] = flags.kv_cache_block_size
if flags.http_port is not None:
entrypoint_kwargs["http_port"] = flags.http_port

e = EntrypointArgs(engine_type, **entrypoint_kwargs)
engine = await make_engine(runtime, e)
await run_input(runtime, input, engine)
await run_input(runtime, args["in_mode"], engine)


if __name__ == "__main__":
Expand Down
4 changes: 3 additions & 1 deletion lib/bindings/python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,11 @@ maturin develop --uv
5. Experimental: To allow using mistral.rs and llama.cpp via the bindings, build with feature flags:

```
maturin develop --features mistralrs,llamacpp
maturin develop --features mistralrs,llamacpp --release
```

`--release` is optional. It builds slower but the resulting library is significantly faster.

See `examples/cli/cli.py` for usage.

They will both be built for CUDA by default. If you see a runtime error `CUDA_ERROR_STUB_LIBRARY` this is because
Expand Down
Loading