@@ -18,9 +18,9 @@ pub struct ModelOptions {
18
18
impl Default for ModelOptions {
19
19
fn default ( ) -> Self {
20
20
Self {
21
- context_size : 512 ,
21
+ context_size : 128 ,
22
22
seed : 0 ,
23
- f16_memory : false ,
23
+ f16_memory : true ,
24
24
m_lock : false ,
25
25
embeddings : false ,
26
26
low_vram : false ,
@@ -35,51 +35,51 @@ impl Default for ModelOptions {
35
35
}
36
36
}
37
37
38
- struct PredictOptions {
39
- seed : i32 ,
40
- threads : i32 ,
41
- tokens : i32 ,
42
- top_k : i32 ,
43
- repeat : i32 ,
44
- batch : i32 ,
45
- n_keep : i32 ,
46
- top_p : f64 ,
47
- temperature : f64 ,
48
- penalty : f64 ,
49
- f16_kv : bool ,
50
- debug_mode : bool ,
51
- stop_prompts : Vec < String > ,
52
- ignore_eos : bool ,
53
-
54
- tail_free_sampling_z : f64 ,
55
- typical_p : f64 ,
56
- frequency_penalty : f64 ,
57
- presence_penalty : f64 ,
58
- mirostat : i32 ,
59
- mirostat_eta : f64 ,
60
- mirostat_tau : f64 ,
61
- penalize_nl : bool ,
62
- logit_bias : String ,
63
- token_callback : Box < dyn Fn ( String ) -> bool > ,
64
-
65
- path_prompt_cache : String ,
66
- m_lock : bool ,
67
- m_map : bool ,
68
- prompt_cache_all : bool ,
69
- prompt_cache_ro : bool ,
70
- main_gpu : String ,
71
- tensor_split : String ,
38
+ pub struct PredictOptions {
39
+ pub seed : i32 ,
40
+ pub threads : i32 ,
41
+ pub tokens : i32 ,
42
+ pub top_k : i32 ,
43
+ pub repeat : i32 ,
44
+ pub batch : i32 ,
45
+ pub n_keep : i32 ,
46
+ pub top_p : f32 ,
47
+ pub temperature : f32 ,
48
+ pub penalty : f32 ,
49
+ pub f16_kv : bool ,
50
+ pub debug_mode : bool ,
51
+ pub stop_prompts : Vec < String > ,
52
+ pub ignore_eos : bool ,
53
+
54
+ pub tail_free_sampling_z : f32 ,
55
+ pub typical_p : f32 ,
56
+ pub frequency_penalty : f32 ,
57
+ pub presence_penalty : f32 ,
58
+ pub mirostat : i32 ,
59
+ pub mirostat_eta : f32 ,
60
+ pub mirostat_tau : f32 ,
61
+ pub penalize_nl : bool ,
62
+ pub logit_bias : String ,
63
+ pub token_callback : Box < dyn Fn ( String ) -> bool > ,
64
+
65
+ pub path_prompt_cache : String ,
66
+ pub m_lock : bool ,
67
+ pub m_map : bool ,
68
+ pub prompt_cache_all : bool ,
69
+ pub prompt_cache_ro : bool ,
70
+ pub main_gpu : String ,
71
+ pub tensor_split : String ,
72
72
}
73
73
74
74
impl Default for PredictOptions {
75
75
fn default ( ) -> Self {
76
76
Self {
77
77
seed : -1 ,
78
- threads : 4 ,
78
+ threads : 8 ,
79
79
tokens : 128 ,
80
80
top_k : 40 ,
81
81
repeat : 64 ,
82
- batch : 8 ,
82
+ batch : 512 ,
83
83
n_keep : 64 ,
84
84
top_p : 0.95 ,
85
85
temperature : 0.8 ,
@@ -110,169 +110,169 @@ impl Default for PredictOptions {
110
110
}
111
111
112
112
impl ModelOptions {
113
- fn set_context ( & mut self , context_size : i32 ) {
113
+ pub fn set_context ( & mut self , context_size : i32 ) {
114
114
self . context_size = context_size;
115
115
}
116
116
117
- fn set_model_seed ( & mut self , seed : i32 ) {
117
+ pub fn set_model_seed ( & mut self , seed : i32 ) {
118
118
self . seed = seed;
119
119
}
120
120
121
- fn enable_f16_memory ( & mut self ) {
121
+ pub fn enable_f16_memory ( & mut self ) {
122
122
self . f16_memory = true ;
123
123
}
124
124
125
- fn enable_embeddings ( & mut self ) {
125
+ pub fn enable_embeddings ( & mut self ) {
126
126
self . embeddings = true ;
127
127
}
128
128
129
- fn enable_m_lock ( & mut self ) {
129
+ pub fn enable_m_lock ( & mut self ) {
130
130
self . m_lock = true ;
131
131
}
132
132
133
- fn set_m_map ( & mut self , m_map : bool ) {
133
+ pub fn set_m_map ( & mut self , m_map : bool ) {
134
134
self . m_map = m_map;
135
135
}
136
136
137
- fn set_n_batch ( & mut self , n_batch : i32 ) {
137
+ pub fn set_n_batch ( & mut self , n_batch : i32 ) {
138
138
self . n_batch = n_batch;
139
139
}
140
140
141
- fn set_tensor_split ( & mut self , tensor_split : String ) {
141
+ pub fn set_tensor_split ( & mut self , tensor_split : String ) {
142
142
self . tensor_split = tensor_split;
143
143
}
144
144
145
- fn set_gpu_layers ( & mut self , n_gpu_layers : i32 ) {
145
+ pub fn set_gpu_layers ( & mut self , n_gpu_layers : i32 ) {
146
146
self . n_gpu_layers = n_gpu_layers;
147
147
}
148
148
149
- fn set_main_gpu ( & mut self , main_gpu : String ) {
149
+ pub fn set_main_gpu ( & mut self , main_gpu : String ) {
150
150
self . main_gpu = main_gpu;
151
151
}
152
152
}
153
153
154
154
impl PredictOptions {
155
- fn set_prediction_tensor_split ( & mut self , tensor_split : String ) {
155
+ pub fn set_prediction_tensor_split ( & mut self , tensor_split : String ) {
156
156
self . tensor_split = tensor_split;
157
157
}
158
158
159
- fn set_prediction_main_gpu ( & mut self , main_gpu : String ) {
159
+ pub fn set_prediction_main_gpu ( & mut self , main_gpu : String ) {
160
160
self . main_gpu = main_gpu;
161
161
}
162
162
163
- fn enable_f16_kv ( & mut self ) {
163
+ pub fn enable_f16_kv ( & mut self ) {
164
164
self . f16_kv = true ;
165
165
}
166
166
167
- fn enable_debug_mode ( & mut self ) {
167
+ pub fn enable_debug_mode ( & mut self ) {
168
168
self . debug_mode = true ;
169
169
}
170
170
171
- fn enable_prompt_cache_all ( & mut self ) {
171
+ pub fn enable_prompt_cache_all ( & mut self ) {
172
172
self . prompt_cache_all = true ;
173
173
}
174
174
175
- fn enable_prompt_cache_ro ( & mut self ) {
175
+ pub fn enable_prompt_cache_ro ( & mut self ) {
176
176
self . prompt_cache_ro = true ;
177
177
}
178
178
179
- fn enable_m_lock ( & mut self ) {
179
+ pub fn enable_m_lock ( & mut self ) {
180
180
self . m_lock = true ;
181
181
}
182
182
183
- fn set_m_lock ( & mut self , m_lock : bool ) {
183
+ pub fn set_m_lock ( & mut self , m_lock : bool ) {
184
184
self . m_lock = m_lock;
185
185
}
186
186
187
- fn set_memory_map ( & mut self , m_map : bool ) {
187
+ pub fn set_memory_map ( & mut self , m_map : bool ) {
188
188
self . m_map = m_map;
189
189
}
190
190
191
- fn set_token_callback ( & mut self , token_callback : Box < dyn Fn ( String ) -> bool > ) {
191
+ pub fn set_token_callback ( & mut self , token_callback : Box < dyn Fn ( String ) -> bool > ) {
192
192
self . token_callback = token_callback;
193
193
}
194
194
195
- fn set_path_prompt_cache ( & mut self , path_prompt_cache : String ) {
195
+ pub fn set_path_prompt_cache ( & mut self , path_prompt_cache : String ) {
196
196
self . path_prompt_cache = path_prompt_cache;
197
197
}
198
198
199
- fn set_seed ( & mut self , seed : i32 ) {
199
+ pub fn set_seed ( & mut self , seed : i32 ) {
200
200
self . seed = seed;
201
201
}
202
202
203
- fn set_threads ( & mut self , threads : i32 ) {
203
+ pub fn set_threads ( & mut self , threads : i32 ) {
204
204
self . threads = threads;
205
205
}
206
206
207
- fn set_tokens ( & mut self , tokens : i32 ) {
207
+ pub fn set_tokens ( & mut self , tokens : i32 ) {
208
208
self . tokens = tokens;
209
209
}
210
210
211
- fn set_top_k ( & mut self , top_k : i32 ) {
211
+ pub fn set_top_k ( & mut self , top_k : i32 ) {
212
212
self . top_k = top_k;
213
213
}
214
214
215
- fn set_repeat ( & mut self , repeat : i32 ) {
215
+ pub fn set_repeat ( & mut self , repeat : i32 ) {
216
216
self . repeat = repeat;
217
217
}
218
218
219
- fn set_batch ( & mut self , batch : i32 ) {
219
+ pub fn set_batch ( & mut self , batch : i32 ) {
220
220
self . batch = batch;
221
221
}
222
222
223
- fn set_n_keep ( & mut self , n_keep : i32 ) {
223
+ pub fn set_n_keep ( & mut self , n_keep : i32 ) {
224
224
self . n_keep = n_keep;
225
225
}
226
226
227
- fn set_top_p ( & mut self , top_p : f64 ) {
227
+ pub fn set_top_p ( & mut self , top_p : f32 ) {
228
228
self . top_p = top_p;
229
229
}
230
230
231
- fn set_temperature ( & mut self , temperature : f64 ) {
231
+ pub fn set_temperature ( & mut self , temperature : f32 ) {
232
232
self . temperature = temperature;
233
233
}
234
234
235
- fn set_penalty ( & mut self , penalty : f64 ) {
235
+ pub fn set_penalty ( & mut self , penalty : f32 ) {
236
236
self . penalty = penalty;
237
237
}
238
238
239
- fn set_tail_free_sampling_z ( & mut self , tail_free_sampling_z : f64 ) {
239
+ pub fn set_tail_free_sampling_z ( & mut self , tail_free_sampling_z : f32 ) {
240
240
self . tail_free_sampling_z = tail_free_sampling_z;
241
241
}
242
242
243
- fn set_typical_p ( & mut self , typical_p : f64 ) {
243
+ pub fn set_typical_p ( & mut self , typical_p : f32 ) {
244
244
self . typical_p = typical_p;
245
245
}
246
246
247
- fn set_frequency_penalty ( & mut self , frequency_penalty : f64 ) {
247
+ pub fn set_frequency_penalty ( & mut self , frequency_penalty : f32 ) {
248
248
self . frequency_penalty = frequency_penalty;
249
249
}
250
250
251
- fn set_presence_penalty ( & mut self , presence_penalty : f64 ) {
251
+ pub fn set_presence_penalty ( & mut self , presence_penalty : f32 ) {
252
252
self . presence_penalty = presence_penalty;
253
253
}
254
254
255
- fn set_mirostat ( & mut self , mirostat : i32 ) {
255
+ pub fn set_mirostat ( & mut self , mirostat : i32 ) {
256
256
self . mirostat = mirostat;
257
257
}
258
258
259
- fn set_mirostat_eta ( & mut self , mirostat_eta : f64 ) {
259
+ pub fn set_mirostat_eta ( & mut self , mirostat_eta : f32 ) {
260
260
self . mirostat_eta = mirostat_eta;
261
261
}
262
262
263
- fn set_mirostat_tau ( & mut self , mirostat_tau : f64 ) {
263
+ pub fn set_mirostat_tau ( & mut self , mirostat_tau : f32 ) {
264
264
self . mirostat_tau = mirostat_tau;
265
265
}
266
266
267
- fn enable_penalize_nl ( & mut self ) {
267
+ pub fn enable_penalize_nl ( & mut self ) {
268
268
self . penalize_nl = true ;
269
269
}
270
270
271
- fn set_logit_bias ( & mut self , logit_bias : String ) {
271
+ pub fn set_logit_bias ( & mut self , logit_bias : String ) {
272
272
self . logit_bias = logit_bias;
273
273
}
274
274
275
- fn ignore_eos ( & mut self ) {
275
+ pub fn ignore_eos ( & mut self ) {
276
276
self . ignore_eos = true ;
277
277
}
278
278
}
0 commit comments