Abaso007 · sourcery-ai · Dec 24, 2023 · sourcery-ai · Dec 24, 2023 · sourcery-ai
diff --git a/convert-baichuan-hf-to-gguf.py b/convert-baichuan-hf-to-gguf.py
@@ -45,14 +45,13 @@ def reverse_hf_part(weights: NDArray, n_part: int) -> NDArray:
         return weights[r * n_part : r * n_part + r, ...]
 
 def count_model_parts(dir_model: str) -> int:
-    num_parts = 0
-
-    for filename in os.listdir(dir_model):
-        if filename.startswith("pytorch_model-"):
-            num_parts += 1
-
+    num_parts = sum(
+        1
+        for filename in os.listdir(dir_model)
+        if filename.startswith("pytorch_model-")
+    )
     if num_parts > 0:
-        print("gguf: found " + str(num_parts) + " model parts")
+        print(f"gguf: found {str(num_parts)} model parts")
 
     return num_parts
 

diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
@@ -136,12 +136,9 @@ def write_vocab(self):
 
     @staticmethod
     def count_model_parts(dir_model: Path, prefix: str) -> int:
-        num_parts = 0
-        for filename in os.listdir(dir_model):
-            if filename.endswith(prefix):
-                num_parts += 1
-
-        return num_parts
+        return sum(
+            1 for filename in os.listdir(dir_model) if filename.endswith(prefix)
+        )
 
     @staticmethod
     def load_hparams(dir_model):
@@ -786,8 +783,8 @@ def set_gguf_parameters(self):
         self.gguf_writer.add_embedding_length(hidden_size)
         self.gguf_writer.add_block_count(block_count)
         self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
-        self.gguf_writer.add_rope_dimension_count(hidden_size // head_count)
-        self.gguf_writer.add_head_count(head_count)
+        self.gguf_writer.add_rope_dimension_count(hidden_size // head_count_kv)
+        self.gguf_writer.add_head_count(head_count_kv)
         self.gguf_writer.add_head_count_kv(head_count_kv)
         self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"])
         self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"])

diff --git a/convert-hf-to-powerinfer-gguf.py b/convert-hf-to-powerinfer-gguf.py
@@ -165,12 +165,9 @@ def write_vocab(self):
 
     @staticmethod
     def count_model_parts(dir_model: Path, prefix: str) -> int:
-        num_parts = 0
-        for filename in os.listdir(dir_model):
-            if filename.endswith(prefix):
-                num_parts += 1
-
-        return num_parts
+        return sum(
+            1 for filename in os.listdir(dir_model) if filename.endswith(prefix)
+        )
 
     @staticmethod
     def load_hparams(dir_model):
@@ -214,7 +211,7 @@ def _get_model_architecture(self) -> gguf.MODEL_ARCH:
         arch = self.hparams["architectures"][0]
         if arch == "FalconForCausalLM":
             return gguf.MODEL_ARCH.FALCON
-        if arch == "RWForCausalLM" or arch == "LlamaForCausalLM":
+        if arch in ["RWForCausalLM", "LlamaForCausalLM"]:
             return gguf.MODEL_ARCH.LLAMA
 
         raise NotImplementedError(f'Architecture "{arch}" not supported!')
@@ -230,9 +227,7 @@ def _translate_tensor_key(
         arch_tensor_key = tensor_map.get_name(key, try_suffixes=try_suffixes)
         if arch_tensor_key is not None:
             return arch_tensor_key
-        # check and handle ReluMLP layers
-        mlp_match = re.match(r"^blk\.\d+\.fc\d\.weight$", key)
-        if mlp_match:
+        if mlp_match := re.match(r"^blk\.\d+\.fc\d\.weight$", key):
             return mlp_match.group(0)
         return None
 

diff --git a/convert-llama-ggml-to-gguf.py b/convert-llama-ggml-to-gguf.py
@@ -163,7 +163,7 @@ def validate_conversion(self, ftype):
             if ftype in ( GGMLFType.MOSTLY_Q4_0, GGMLFType.MOSTLY_Q4_1,
                           GGMLFType.MOSTLY_Q4_1_SOME_F16, GGMLFType.MOSTLY_Q8_0):
                 err = 'Q4 and Q8 quantizations changed in GGJTv3.'
-        if len(err) > 0:
+        if err != "":
             raise ValueError(f'{err} Sorry, your {self.file_format.name}v{self.format_version} file of type {ftype.name} is not eligible for conversion.')
 
     def load(self, data, offset):
@@ -199,17 +199,16 @@ def __init__(self, ggml_model, data, cfg, params_override = None, vocab_override
         self.special_vocab = special_vocab
         if params_override is not None:
             n_kv_head = params_override.n_head_kv
+        elif cfg.gqa == 1:
+            n_kv_head = hp.n_head
         else:
-            if cfg.gqa == 1:
-                n_kv_head = hp.n_head
-            else:
-                gqa = float(cfg.gqa)
-                n_kv_head = None
-                for x in range(1, 256):
-                    if float(hp.n_head) / float(x) == gqa:
-                        n_kv_head = x
-                assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param"
-                print(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}')
+            gqa = float(cfg.gqa)
+            n_kv_head = None
+            for x in range(1, 256):
+                if float(hp.n_head) / float(x) == gqa:
+                    n_kv_head = x
+            assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param"
+            print(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}')
         self.n_kv_head = n_kv_head
         self.name_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.LLAMA, ggml_model.hyperparameters.n_layer)
 
@@ -281,15 +280,15 @@ def add_vocab(self, gguf_writer):
         if self.vocab_override is not None:
             vo = self.vocab_override
             print('* Adding vocab item(s)')
-            for (idx, (vbytes, score, ttype)) in enumerate(vo.all_tokens()):
+            for vbytes, score, ttype in vo.all_tokens():
                 tokens.append(vbytes)
                 scores.append(score)
                 toktypes.append(ttype)
             assert len(tokens) == hp.n_vocab, \
-                f'Override vocab has a different number of items than hyperparameters - override = {len(tokens)} but n_vocab={hp.n_vocab}'
+                    f'Override vocab has a different number of items than hyperparameters - override = {len(tokens)} but n_vocab={hp.n_vocab}'
             gguf_writer.add_token_list(tokens)
             gguf_writer.add_token_scores(scores)
-            if len(toktypes) > 0:
+            if toktypes:
                 gguf_writer.add_token_types(toktypes)
             return
         print(f'* Adding {hp.n_vocab} vocab item(s)')

diff --git a/convert-lora-to-ggml.py b/convert-lora-to-ggml.py
@@ -28,8 +28,9 @@
 
 
 def translate_tensor_name(t: str) -> str:
-    match = re.match(r".*layers\.(\d+)\.(\w+\.\w+)\.lora_(A|B)\.weight", t)
-    if match:
+    if match := re.match(
+        r".*layers\.(\d+)\.(\w+\.\w+)\.lora_(A|B)\.weight", t
+    ):
         nn = match.group(1)
         sub_layer = match.group(2)
         lora_type = match.group(3)
@@ -39,10 +40,7 @@ def translate_tensor_name(t: str) -> str:
             print(f"Error: unrecognized sub-layer {sub_layer} in tensor {t}")
             sys.exit(1)
 
-        output_string = (
-            f"blk.{nn}.{HF_SUBLAYER_TO_GGML[sub_layer]}.weight.lora{lora_type}"
-        )
-        return output_string
+        return f"blk.{nn}.{HF_SUBLAYER_TO_GGML[sub_layer]}.weight.lora{lora_type}"
     else:
         print(f"Error: unrecognized tensor {t}")
         sys.exit(1)
@@ -122,7 +120,7 @@ def write_tensor_header(
         if k in ["llama_proj.weight", "llama_proj.bias"]:
             continue
         if k.endswith("lora_A.weight"):
-            if v.dtype != torch.float16 and v.dtype != torch.float32:
+            if v.dtype not in [torch.float16, torch.float32]:
                 v = v.float()
             v = v.T
         else:

diff --git a/convert-persimmon-to-gguf.py b/convert-persimmon-to-gguf.py
@@ -12,7 +12,7 @@
 def _flatten_dict(dct, tensors, prefix=None):
     assert isinstance(dct, dict)
     for key in dct.keys():
-        new_prefix = prefix + '.' + key if prefix is not None else key
+        new_prefix = f'{prefix}.{key}' if prefix is not None else key
         if isinstance(dct[key], torch.Tensor):
             tensors[new_prefix] = dct[key]
         elif isinstance(dct[key], dict):
@@ -51,7 +51,6 @@ def _get_sentencepiece_tokenizer_info(dir_model: Path):
         tokens.append(text)
         scores.append(score)
         toktypes.append(toktype)
-        pass
     return tokens, scores, toktypes
 
 def main():
@@ -82,8 +81,8 @@ def main():
     gguf_writer.add_embedding_length(hidden_size)
     gguf_writer.add_block_count(block_count)
     gguf_writer.add_feed_forward_length(hparams.ffn_hidden_size)
-    gguf_writer.add_rope_dimension_count(hidden_size // head_count)
-    gguf_writer.add_head_count(head_count)
+    gguf_writer.add_rope_dimension_count(hidden_size // head_count_kv)
+    gguf_writer.add_head_count(head_count_kv)
     gguf_writer.add_head_count_kv(head_count_kv)
     gguf_writer.add_rope_freq_base(hparams.rotary_emb_base)
     gguf_writer.add_layer_norm_eps(hparams.layernorm_epsilon)
@@ -98,19 +97,18 @@ def main():
 
     tensor_map = gguf.get_tensor_name_map(arch, block_count)
     print(tensor_map)
-    for name in tensors.keys():
-        data = tensors[name]
+    for name, data in tensors.items():
         if name.endswith(".self_attention.rotary_emb.inv_freq"):
             continue
         old_dtype = data.dtype
         # TODO: FP16 conversion produces garbage outputs. (Q8_0 does not, so..?)
         data = data.to(torch.float32).squeeze().numpy()
         new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
         if new_name is None:
-            print("Can not map tensor '" + name + "'")
+            print(f"Can not map tensor '{name}'")
             sys.exit()
         n_dims = len(data.shape)
-        print(new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
+        print(f"{new_name}, n_dims = {n_dims}, {str(old_dtype)} --> {str(data.dtype)}")
         gguf_writer.add_tensor(new_name, data)
     print("gguf: write header")
     gguf_writer.write_header_to_file()