Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
'Refactored by Sourcery'
  • Loading branch information
Sourcery AI committed Dec 24, 2023
commit b2d666f4d912bf3fe9190e40ee7ca87a9919a113
13 changes: 6 additions & 7 deletions convert-baichuan-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,13 @@ def reverse_hf_part(weights: NDArray, n_part: int) -> NDArray:
return weights[r * n_part : r * n_part + r, ...]

def count_model_parts(dir_model: str) -> int:
num_parts = 0

for filename in os.listdir(dir_model):
if filename.startswith("pytorch_model-"):
num_parts += 1

num_parts = sum(
1
for filename in os.listdir(dir_model)
if filename.startswith("pytorch_model-")
)
if num_parts > 0:
print("gguf: found " + str(num_parts) + " model parts")
print(f"gguf: found {str(num_parts)} model parts")
Comment on lines -48 to +54
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function count_model_parts refactored with the following changes:


return num_parts

Expand Down
13 changes: 5 additions & 8 deletions convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,9 @@ def write_vocab(self):

@staticmethod
def count_model_parts(dir_model: Path, prefix: str) -> int:
num_parts = 0
for filename in os.listdir(dir_model):
if filename.endswith(prefix):
num_parts += 1

return num_parts
return sum(
1 for filename in os.listdir(dir_model) if filename.endswith(prefix)
)
Comment on lines -139 to +141
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function Model.count_model_parts refactored with the following changes:


@staticmethod
def load_hparams(dir_model):
Expand Down Expand Up @@ -786,8 +783,8 @@ def set_gguf_parameters(self):
self.gguf_writer.add_embedding_length(hidden_size)
self.gguf_writer.add_block_count(block_count)
self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
self.gguf_writer.add_rope_dimension_count(hidden_size // head_count)
self.gguf_writer.add_head_count(head_count)
self.gguf_writer.add_rope_dimension_count(hidden_size // head_count_kv)
self.gguf_writer.add_head_count(head_count_kv)
Comment on lines -789 to +787
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function PersimmonModel.set_gguf_parameters refactored with the following changes:

self.gguf_writer.add_head_count_kv(head_count_kv)
self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"])
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"])
Expand Down
15 changes: 5 additions & 10 deletions convert-hf-to-powerinfer-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,9 @@ def write_vocab(self):

@staticmethod
def count_model_parts(dir_model: Path, prefix: str) -> int:
num_parts = 0
for filename in os.listdir(dir_model):
if filename.endswith(prefix):
num_parts += 1

return num_parts
return sum(
1 for filename in os.listdir(dir_model) if filename.endswith(prefix)
)
Comment on lines -168 to +170
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function Model.count_model_parts refactored with the following changes:


@staticmethod
def load_hparams(dir_model):
Expand Down Expand Up @@ -214,7 +211,7 @@ def _get_model_architecture(self) -> gguf.MODEL_ARCH:
arch = self.hparams["architectures"][0]
if arch == "FalconForCausalLM":
return gguf.MODEL_ARCH.FALCON
if arch == "RWForCausalLM" or arch == "LlamaForCausalLM":
if arch in ["RWForCausalLM", "LlamaForCausalLM"]:
Comment on lines -217 to +214
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function Model._get_model_architecture refactored with the following changes:

return gguf.MODEL_ARCH.LLAMA

raise NotImplementedError(f'Architecture "{arch}" not supported!')
Expand All @@ -230,9 +227,7 @@ def _translate_tensor_key(
arch_tensor_key = tensor_map.get_name(key, try_suffixes=try_suffixes)
if arch_tensor_key is not None:
return arch_tensor_key
# check and handle ReluMLP layers
mlp_match = re.match(r"^blk\.\d+\.fc\d\.weight$", key)
if mlp_match:
if mlp_match := re.match(r"^blk\.\d+\.fc\d\.weight$", key):
Comment on lines -233 to +230
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function Model._translate_tensor_key refactored with the following changes:

This removes the following comments ( why? ):

# check and handle ReluMLP layers

return mlp_match.group(0)
return None

Expand Down
27 changes: 13 additions & 14 deletions convert-llama-ggml-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def validate_conversion(self, ftype):
if ftype in ( GGMLFType.MOSTLY_Q4_0, GGMLFType.MOSTLY_Q4_1,
GGMLFType.MOSTLY_Q4_1_SOME_F16, GGMLFType.MOSTLY_Q8_0):
err = 'Q4 and Q8 quantizations changed in GGJTv3.'
if len(err) > 0:
if err != "":
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function GGMLModel.validate_conversion refactored with the following changes:

raise ValueError(f'{err} Sorry, your {self.file_format.name}v{self.format_version} file of type {ftype.name} is not eligible for conversion.')

def load(self, data, offset):
Expand Down Expand Up @@ -199,17 +199,16 @@ def __init__(self, ggml_model, data, cfg, params_override = None, vocab_override
self.special_vocab = special_vocab
if params_override is not None:
n_kv_head = params_override.n_head_kv
elif cfg.gqa == 1:
n_kv_head = hp.n_head
else:
if cfg.gqa == 1:
n_kv_head = hp.n_head
else:
gqa = float(cfg.gqa)
n_kv_head = None
for x in range(1, 256):
if float(hp.n_head) / float(x) == gqa:
n_kv_head = x
assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param"
print(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}')
gqa = float(cfg.gqa)
n_kv_head = None
for x in range(1, 256):
if float(hp.n_head) / float(x) == gqa:
n_kv_head = x
assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param"
print(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}')
Comment on lines +202 to +211
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function GGMLToGGUF.__init__ refactored with the following changes:

self.n_kv_head = n_kv_head
self.name_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.LLAMA, ggml_model.hyperparameters.n_layer)

Expand Down Expand Up @@ -281,15 +280,15 @@ def add_vocab(self, gguf_writer):
if self.vocab_override is not None:
vo = self.vocab_override
print('* Adding vocab item(s)')
for (idx, (vbytes, score, ttype)) in enumerate(vo.all_tokens()):
for vbytes, score, ttype in vo.all_tokens():
tokens.append(vbytes)
scores.append(score)
toktypes.append(ttype)
assert len(tokens) == hp.n_vocab, \
f'Override vocab has a different number of items than hyperparameters - override = {len(tokens)} but n_vocab={hp.n_vocab}'
f'Override vocab has a different number of items than hyperparameters - override = {len(tokens)} but n_vocab={hp.n_vocab}'
gguf_writer.add_token_list(tokens)
gguf_writer.add_token_scores(scores)
if len(toktypes) > 0:
if toktypes:
Comment on lines -284 to +291
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function GGMLToGGUF.add_vocab refactored with the following changes:

gguf_writer.add_token_types(toktypes)
return
print(f'* Adding {hp.n_vocab} vocab item(s)')
Expand Down
12 changes: 5 additions & 7 deletions convert-lora-to-ggml.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@


def translate_tensor_name(t: str) -> str:
match = re.match(r".*layers\.(\d+)\.(\w+\.\w+)\.lora_(A|B)\.weight", t)
if match:
if match := re.match(
r".*layers\.(\d+)\.(\w+\.\w+)\.lora_(A|B)\.weight", t
):
Comment on lines -31 to +33
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function translate_tensor_name refactored with the following changes:

nn = match.group(1)
sub_layer = match.group(2)
lora_type = match.group(3)
Expand All @@ -39,10 +40,7 @@ def translate_tensor_name(t: str) -> str:
print(f"Error: unrecognized sub-layer {sub_layer} in tensor {t}")
sys.exit(1)

output_string = (
f"blk.{nn}.{HF_SUBLAYER_TO_GGML[sub_layer]}.weight.lora{lora_type}"
)
return output_string
return f"blk.{nn}.{HF_SUBLAYER_TO_GGML[sub_layer]}.weight.lora{lora_type}"
else:
print(f"Error: unrecognized tensor {t}")
sys.exit(1)
Expand Down Expand Up @@ -122,7 +120,7 @@ def write_tensor_header(
if k in ["llama_proj.weight", "llama_proj.bias"]:
continue
if k.endswith("lora_A.weight"):
if v.dtype != torch.float16 and v.dtype != torch.float32:
if v.dtype not in [torch.float16, torch.float32]:
Comment on lines -125 to +123
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 125-125 refactored with the following changes:

v = v.float()
v = v.T
else:
Expand Down
14 changes: 6 additions & 8 deletions convert-persimmon-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
def _flatten_dict(dct, tensors, prefix=None):
assert isinstance(dct, dict)
for key in dct.keys():
new_prefix = prefix + '.' + key if prefix is not None else key
new_prefix = f'{prefix}.{key}' if prefix is not None else key
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function _flatten_dict refactored with the following changes:

if isinstance(dct[key], torch.Tensor):
tensors[new_prefix] = dct[key]
elif isinstance(dct[key], dict):
Expand Down Expand Up @@ -51,7 +51,6 @@ def _get_sentencepiece_tokenizer_info(dir_model: Path):
tokens.append(text)
scores.append(score)
toktypes.append(toktype)
pass
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function _get_sentencepiece_tokenizer_info refactored with the following changes:

return tokens, scores, toktypes

def main():
Expand Down Expand Up @@ -82,8 +81,8 @@ def main():
gguf_writer.add_embedding_length(hidden_size)
gguf_writer.add_block_count(block_count)
gguf_writer.add_feed_forward_length(hparams.ffn_hidden_size)
gguf_writer.add_rope_dimension_count(hidden_size // head_count)
gguf_writer.add_head_count(head_count)
gguf_writer.add_rope_dimension_count(hidden_size // head_count_kv)
gguf_writer.add_head_count(head_count_kv)
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function main refactored with the following changes:

gguf_writer.add_head_count_kv(head_count_kv)
gguf_writer.add_rope_freq_base(hparams.rotary_emb_base)
gguf_writer.add_layer_norm_eps(hparams.layernorm_epsilon)
Expand All @@ -98,19 +97,18 @@ def main():

tensor_map = gguf.get_tensor_name_map(arch, block_count)
print(tensor_map)
for name in tensors.keys():
data = tensors[name]
for name, data in tensors.items():
if name.endswith(".self_attention.rotary_emb.inv_freq"):
continue
old_dtype = data.dtype
# TODO: FP16 conversion produces garbage outputs. (Q8_0 does not, so..?)
data = data.to(torch.float32).squeeze().numpy()
new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
if new_name is None:
print("Can not map tensor '" + name + "'")
print(f"Can not map tensor '{name}'")
sys.exit()
n_dims = len(data.shape)
print(new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
print(f"{new_name}, n_dims = {n_dims}, {str(old_dtype)} --> {str(data.dtype)}")
gguf_writer.add_tensor(new_name, data)
print("gguf: write header")
gguf_writer.write_header_to_file()
Expand Down
Loading