|
2 | 2 | # SPDX-License-Identifier: Apache-2.0 |
3 | 3 |
|
4 | 4 | # Example cli using the Python bindings, similar to `dynamo-run`. |
| 5 | +# |
5 | 6 | # Usage: `python cli.py in=text out=mistralrs <your-model>`. |
| 7 | +# `in` can be: |
| 8 | +# - "http": OpenAI compliant HTTP server |
| 9 | +# - "text": Interactive text chat |
| 10 | +# - "batch:<file.jsonl>": Run all the prompts in the JSONL file, write out to a jsonl in current dir. |
| 11 | +# - "stdin": Allows you to pipe something in: `echo prompt | python cli.py in=stdin out=...` |
| 12 | +# - "dyn://name": Connect to nats/etcd and listen for requests from frontend. |
| 13 | +# |
| 14 | +# `out` can be: |
| 15 | +# - "dyn": Run as the frontend node. Auto-discover workers and route traffic to them. |
| 16 | +# - "mistralrs", "llamacpp", "sglang", "vllm", "trtllm", "echo": An LLM worker. |
| 17 | +# |
6 | 18 | # Must be in a virtualenv with the Dynamo bindings (or wheel) installed. |
| 19 | +# |
| 20 | +# To use mistralrs or llamacpp you must build the library with those features: |
| 21 | +# ``` |
| 22 | +# maturin develop --features mistralrs,llamacpp --release |
| 23 | +# ``` |
| 24 | +# |
| 25 | +# `--release` is optional. It builds slower but the resulting library is significantly faster. |
| 26 | +# |
| 27 | +# They will both be built for CUDA by default. If you see a runtime error `CUDA_ERROR_STUB_LIBRARY` this is because |
| 28 | +# the stub `libcuda.so` is earlier on the library search path than the real libcuda. Try removing |
| 29 | +# the `rpath` from the library: |
| 30 | +# |
| 31 | +# ``` |
| 32 | +# patchelf --set-rpath '' _core.cpython-312-x86_64-linux-gnu.so |
| 33 | +# ``` |
| 34 | +# |
| 35 | +# If you include the `llamacpp` feature flag, `libllama.so` and `libggml.so` (and family) will need to be |
| 36 | +# available at runtime. |
| 37 | +# |
7 | 38 |
|
8 | 39 | import argparse |
9 | 40 | import asyncio |
@@ -47,7 +78,8 @@ def parse_args(): |
47 | 78 |
|
48 | 79 | # --- Step 2: Argparse for flags and the model path --- |
49 | 80 | parser = argparse.ArgumentParser( |
50 | | - description="Dynamo CLI: Connect inputs to an engine", |
| 81 | + description="Dynamo example CLI: Connect inputs to an engine", |
| 82 | + usage="python cli.py in=text out=mistralrs <your-model>", |
51 | 83 | formatter_class=argparse.RawTextHelpFormatter, # To preserve multi-line help formatting |
52 | 84 | ) |
53 | 85 |
|
|
0 commit comments