You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+7Lines changed: 7 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -72,6 +72,7 @@ def __init__(
72
72
split_mode: int=llama_cpp.LLAMA_SPLIT_MODE_LAYER,
73
73
main_gpu: int=0,
74
74
tensor_split: Optional[List[float]] =None,
75
+
rpc_servers: Optional[str] =None,
75
76
vocab_only: bool=False,
76
77
use_mmap: bool=True,
77
78
use_mlock: bool=False,
@@ -150,6 +151,7 @@ def __init__(
150
151
split_mode: How to split the model across GPUs. See llama_cpp.LLAMA_SPLIT_* for options.
151
152
main_gpu: main_gpu interpretation depends on split_mode: LLAMA_SPLIT_NONE: the GPU that is used for the entire model. LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results. LLAMA_SPLIT_LAYER: ignored
152
153
tensor_split: How split tensors should be distributed across GPUs. If None, the model is not split.
154
+
rpc_servers: Comma separated list of RPC servers to use for offloading
153
155
vocab_only: Only load the vocabulary no weights.
154
156
use_mmap: Use mmap if possible.
155
157
use_mlock: Force the system to keep the model in RAM.
@@ -221,6 +223,11 @@ def __init__(
221
223
) # 0x7FFFFFFF is INT32 max, will be auto set to all layers
Copy file name to clipboardExpand all lines: llama_cpp/server/settings.py
+4Lines changed: 4 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -58,6 +58,10 @@ class ModelSettings(BaseSettings):
58
58
default=None,
59
59
description="List of model kv overrides in the format key=type:value where type is one of (bool, int, float). Valid true values are (true, TRUE, 1), otherwise false.",
60
60
)
61
+
rpc_servers: Optional[str] =Field(
62
+
default=None,
63
+
description="comma seperated list of rpc servers for offloading",
64
+
)
61
65
# Context Params
62
66
seed: int=Field(
63
67
default=llama_cpp.LLAMA_DEFAULT_SEED, description="Random seed. -1 for random."
0 commit comments