gguf : start implementing quantization (WIP)

ggml-org · ggerganov · Aug 21, 2023 · Jul 26, 2023 · Jul 26, 2023 · Jul 26, 2023
commit 1fc3d30b71a707187eb1f995c4776db7aaa6265a
@@ -421,7 +421,7 @@ int main(int argc, char ** argv) {
         GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
     } else if (mode == "r") {
         GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
-        GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file");
+        //GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file");
         GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file");
     } else if (mode == "q") {
         llama_model_quantize_params params = llama_model_quantize_default_params();

diff --git a/gguf-llama.cpp b/gguf-llama.cpp
@@ -527,7 +527,7 @@ struct ggml_context * ctx_data = NULL;
         // TODO: read all hparams from file
         int q_ver_idx = gguf_find_key (gguf_ctx, "general.quantization_version");
         if (q_ver_idx != -1) {
-            hparams.ftype = gguf_get_val_u32(gguf_ctx, q_ver_idx);
+            hparams.ftype = (enum llama_ftype) gguf_get_val_u32(gguf_ctx, q_ver_idx);
         }
 
         hparams.n_vocab = read_n_vocab();