Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
73 commits
Select commit Hold shift + click to select a range
874c341
support splits in convert.py
christianazinn Apr 27, 2024
72cbd4e
Support split by size and dry run to write estimated shards/filesizes
christianazinn Apr 28, 2024
702a744
Move split functionality to new GGUFManager class
christianazinn Apr 28, 2024
c33bdf3
fix improper function signature
christianazinn Apr 29, 2024
b7c6120
tentative push of convert-hf-to-gguf support
christianazinn May 5, 2024
14b3291
Merge branch 'master' into convert-split
mofosyne May 9, 2024
87a98a5
resolve merge + SplitArguments for easier parsing
christianazinn May 10, 2024
2dd7841
Merge remote-tracking branch 'origin' into convert-split
christianazinn May 23, 2024
3ff27ef
Fix eager tensor memory leak and remove convert.py changes
christianazinn May 23, 2024
6b5c375
refactor SplitStrategy to be a deque
christianazinn May 24, 2024
09baf2f
fix Q8 quantization
christianazinn Jun 3, 2024
240243e
remove unnecessary imports in gguf_manager
christianazinn Jun 3, 2024
140eb52
Merge branch 'master' into convert-split
christianazinn Jun 3, 2024
a9c7703
fix final? merge issue
christianazinn Jun 3, 2024
efead04
fix gguf_writer placement and remove comments
christianazinn Jun 3, 2024
c8ecbc6
oops, actually fix gguf_writer placement
christianazinn Jun 3, 2024
3e9430d
reduce duplicated code from gguf_writer
christianazinn Jun 5, 2024
f6fd3ea
further simplify GGUFManager
christianazinn Jun 5, 2024
bb5ee02
simplify even further and standardize with GGUFWriter
christianazinn Jun 5, 2024
5ad397d
reduce diffs with master
christianazinn Jun 5, 2024
ce7e698
form shards while adding tensors, SHA256 sums agree with master
christianazinn Jun 5, 2024
706bd69
re-add type hint
christianazinn Jun 6, 2024
6a05183
GGUFWriter compatibility fix
christianazinn Jun 6, 2024
3328b0a
Shard dataclass and un-negative dont_add_architecture
christianazinn Jun 6, 2024
1cbab22
type consistency in format_n_bytes_to_str
christianazinn Jun 6, 2024
2037eab
move kv keys to constants.py
christianazinn Jun 6, 2024
83e4a3f
make pathlib explicit
christianazinn Jun 6, 2024
13ffe22
base-1024 bytes to base-1000
christianazinn Jun 6, 2024
6d3a256
rename GGUFManager to GGUFWriterSplit
christianazinn Jun 7, 2024
1312e28
Update gguf-py/gguf/constants.py
christianazinn Jun 7, 2024
5f29d4a
fix convert-hf-to-gguf.py permissions
christianazinn Jun 7, 2024
0283fc1
fix line endings
christianazinn Jun 7, 2024
dc5cf5f
Update gguf-py/gguf/gguf_writer_split.py
christianazinn Jun 7, 2024
e093dfb
convert-hf : restore executable file permission
compilade Jun 7, 2024
9576965
examples/convert-legacy-llama.py: restore executable file permission
christianazinn Jun 8, 2024
c6ae1d6
reinstate original gguf package import and fix type annotation
christianazinn Jun 8, 2024
2e70fa1
attempt to appease the linter
christianazinn Jun 8, 2024
891b19c
attempt 2 to appease the linter
christianazinn Jun 8, 2024
02be0dd
attempt 3 to appease the linter
christianazinn Jun 8, 2024
f658e91
comma consistency
christianazinn Jun 8, 2024
079dfe3
Update convert-hf-to-gguf.py
christianazinn Jun 8, 2024
282e71f
edit cmd line args
christianazinn Jun 9, 2024
666bb09
Merge branch 'master' into convert-split
christianazinn Jun 9, 2024
03cc9bc
use simplification from #7827
christianazinn Jun 9, 2024
97dd416
kv/ti data are still wrong
christianazinn Jun 9, 2024
ff2dd7d
try to refactor kv data (still fails)
christianazinn Jun 9, 2024
ba1be97
fix ti data messiness
christianazinn Jun 9, 2024
69d6e7a
Merge branch 'master' into convert-split
christianazinn Jun 9, 2024
0779f2f
tidy up
christianazinn Jun 9, 2024
a234bf8
fix linting
christianazinn Jun 9, 2024
49b9fbe
actually make the linter happy
christianazinn Jun 9, 2024
0471f67
cleanup round 1
christianazinn Jun 9, 2024
5a96b8f
remove SplitStrategy, SplitArguments
christianazinn Jun 9, 2024
f7ecd99
appease linter
christianazinn Jun 9, 2024
9d7f694
fix typing and clean up
christianazinn Jun 9, 2024
0417104
fix linting
christianazinn Jun 9, 2024
70a6bc9
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 9, 2024
1e2d9cb
progress bar, fix split logic
christianazinn Jun 9, 2024
f7e7983
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
79bd2bf
catch oversights
christianazinn Jun 10, 2024
7eea552
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
99f9a24
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
ad02c94
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
c1b1a29
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
4550826
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
efa0609
swap bar orders
christianazinn Jun 10, 2024
b843445
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
854bd64
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
05b183f
compatibility fix
christianazinn Jun 10, 2024
e9895d2
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
4e4e376
Merge branch 'master' into convert-split
christianazinn Jun 15, 2024
163712e
Update convert-hf-to-gguf.py
mofosyne Jun 23, 2024
6e4182c
Merge branch 'master' into convert-split
christianazinn Jun 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
further simplify GGUFManager
  • Loading branch information
christianazinn committed Jun 5, 2024
commit f6fd3ea4e9a0a68dadbbd3956778672b7735e2d5
7 changes: 0 additions & 7 deletions convert-hf-to-gguf-update.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,7 @@ class TOKENIZER_TYPE(IntEnum):
{"name": "jina-v2-en", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en", }, # WPM!
{"name": "jina-v2-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
{"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
<<<<<<< Updated upstream
{"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", },
=======
{"name": "bloom", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigscience/bloom-7b1", },
{"name": "gptbigcode", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/gpt_bigcode-santacoder", },
{"name": "phi2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", },
{"name": "codeshell", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/WisdomShell/CodeShell-7B-Chat", },
>>>>>>> Stashed changes
]


Expand Down
12 changes: 8 additions & 4 deletions convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class Model:
tensor_map: gguf.TensorNameMap
tensor_names: set[str] | None
fname_out: Path
gguf_writer: gguf.GGUFManager
gguf_writer: gguf.GGUFWriter

# subclasses should define this!
model_arch: gguf.MODEL_ARCH
Expand Down Expand Up @@ -329,11 +329,16 @@ def write_tensors(self):

def write(self):
self.write_tensors()
self.gguf_writer.write_to_file()
self.gguf_writer.write_header_to_file()
self.gguf_writer.write_kv_data_to_file()
self.gguf_writer.write_ti_data_to_file()
self.gguf_writer.close()

def write_vocab(self):
self.gguf_writer.write_to_file(meta_only=True)
if self.gguf_writer.split_arguments.split:
raise ValueError('Splitting the vocabulary is not supported')
self.gguf_writer.write_header_to_file()
self.gguf_writer.write_kv_data_to_file()
self.gguf_writer.close()

@staticmethod
Expand Down Expand Up @@ -1563,7 +1568,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter

return [(self.map_tensor_name(name), data_torch)]

# TODO what the hell is this?
@Model.register("QWenLMHeadModel")
class QwenModel(Model):
model_arch = gguf.MODEL_ARCH.QWEN
Expand Down
80 changes: 43 additions & 37 deletions gguf-py/gguf/gguf_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

import os
from enum import IntEnum
from typing import TYPE_CHECKING, Any, Sequence, Mapping
from string import ascii_letters, digits
from typing import TYPE_CHECKING, Any, Sequence
from argparse import Namespace
from math import ceil
from collections import deque
Expand All @@ -18,7 +17,7 @@
GGUFEndian,
GGUFValueType
)
from .gguf_writer import GGUFWriter
from .gguf_writer import GGUFWriter, WriterState


SHARD_NAME_FORMAT = "{:s}-{:05d}-of-{:05d}.gguf"
Expand Down Expand Up @@ -74,7 +73,7 @@ def __init__(self, fname_out: os.PathLike[str], model: list[TensorTempData], arc
self.append((shard, model[start:stop], GGUFWriter(shard, arch, use_temp_file=use_temp_file, endianess=endianess)))

elif split_arguments.split_style == SplitStyle.SIZE:
shards = deque()
shards = []

# we have to determine the shards first to determine how many shards there will be in total - two passes
for i, shard in enumerate(model):
Expand Down Expand Up @@ -135,7 +134,6 @@ def format_n_bytes_to_str(num: int) -> str:
num /= 1024.0
return f"{num:.1f}T - over 1TB, --split recommended"

# TODO fall back to normal GGUFWriter in convert-hf-to-gguf.py if no --split
class GGUFManager(GGUFWriter):
kv_data: KVTempData
tensors: list[TensorTempData]
Expand All @@ -145,27 +143,25 @@ class GGUFManager(GGUFWriter):
def __init__(self, path: os.PathLike[str] | str, arch: str, split_arguments: SplitArguments,
use_temp_file: bool = True, endianess: GGUFEndian = GGUFEndian.LITTLE
) -> None:
# TODO be able to use superclass constructor
# super().__init__(path, arch, use_temp_file=use_temp_file, endianess=endianess)
# we intentionally don't call superclass constructor
self.arch = arch
self.path = path
self.endianess = endianess
self.offset_tensor = 0
self.kv_data = {}
self.tensors = []
# TODO how many of these do you need
self.split_strategy = None
self.total_shards = None
self.total_tensors = None
self.total_shards = 0
self.total_tensors = 0
self.use_temp_file = use_temp_file
self.split_arguments = split_arguments
self.recent_key = None
self.state = WriterState.EMPTY
self.add_architecture()

# TODO split back into write_header_to_file, write_kv_data_to_file, write_ti_data_to_file
def write_to_file(self, meta_only: bool = False) -> None:
def write_header_to_file(self) -> None:
if self.state is not WriterState.EMPTY:
raise ValueError(f'Expected GGUFManager state to be EMPTY, got {self.state}')

# here is the first place you can assume you have all tensors written and you can establish the size of the file - so logic goes here
self.total_tensors = len(self.tensors)
total_size = sum(SplitStrategy.get_tensor_size(tensor[1]) for tensor in self.tensors)

Expand All @@ -182,42 +178,50 @@ def write_to_file(self, meta_only: bool = False) -> None:
del self.tensors
self.total_shards = len(self.split_strategy)

print("\nWriting the following files:")
for (shard_path, shard_tensors, _) in self.split_strategy:
size = SplitStrategy.format_n_bytes_to_str(sum(SplitStrategy.get_tensor_size(t[1]) for t in shard_tensors)) if shard_tensors else "negligible - metadata only"
print(f" {shard_path}: n_tensors = {len(shard_tensors) if shard_tensors else 0}, total_size = {size}")

if self.split_arguments.dry_run:
print("\nDry run, not writing files")
# instantiating GGUFWriters creates files
for name, _, _ in self.split_strategy:
os.remove(name)
return

self.state = WriterState.HEADER

def write_kv_data_to_file(self) -> None:
if self.split_arguments.dry_run:
return

if self.state is not WriterState.HEADER:
raise ValueError(f'Expected GGUFManager state to be HEADER, got {self.state}')

# only the first shard needs all the KV data
for key, (value, etype) in self.kv_data.items():
self.split_strategy[0][2].add_key(key)
self.split_strategy[0][2].add_val(value, etype)

# the other shards need shard data
if self.split_arguments.split_style != SplitStyle.NONE:
for i, (_, _, writer) in enumerate(self.split_strategy):
writer.add_uint16(LLM_KV_SPLIT_NO, i)
writer.add_uint16(LLM_KV_SPLIT_COUNT, self.total_shards)
writer.add_int32(LLM_KV_SPLIT_TENSORS_COUNT, self.total_tensors)

# metadata/vocab only can write and return here
if meta_only:
for i, (_, _, writer) in enumerate(self.split_strategy):
writer.write_header_to_file()
writer.write_kv_data_to_file()
return

# tensor writing code starts here

print("\nWriting the following files:")
for (shard_path, shard_tensors, _) in self.split_strategy:
size = SplitStrategy.format_n_bytes_to_str(sum(SplitStrategy.get_tensor_size(t[1]) for t in shard_tensors)) if shard_tensors else "negligible - metadata only"
print(f" {shard_path}: n_tensors = {len(shard_tensors) if shard_tensors else 0}, total_size = {size}")
self.state = WriterState.KV_DATA

def write_ti_data_to_file(self) -> None:
if self.split_arguments.dry_run:
print("\nDry run, not writing files")
# instantiating GGUFWriters creates files
for name, _, _ in self.split_strategy:
os.remove(name)
return

# run add_tensor_info, write data, then write_tensor_data - taken from convert.py
if self.state is not WriterState.KV_DATA:
raise ValueError(f'Expected GGUFManager state to be KV_DATA, got {self.state}')

running_total = self.total_tensors
ct = 0
while True:
for ct in range(self.total_shards):
try:
(_, tensors, writer) = self.split_strategy.popleft()
tensors = deque(tensors) if tensors else None
Expand All @@ -234,15 +238,17 @@ def write_to_file(self, meta_only: bool = False) -> None:
break
writer.add_tensor(name, tensor, raw_dtype=dtype)

print(f"Writing to shard {ct + 1}/{self.total_shards} with {shard_num_tensors}/{running_total} remaining tensors (of {self.total_tensors} total)")
running_total -= shard_num_tensors
print(f"Writing to shard {ct}/{self.total_shards} with {shard_num_tensors}/{running_total} remaining tensors (of {self.total_tensors} total)")
running_total -= shard_num_tensors

# need to write everything down here
writer.write_header_to_file()
writer.write_kv_data_to_file()
writer.write_tensors_to_file(progress=True)
ct = ct + 1
del tensors

self.state = WriterState.TI_DATA

# override add_key, add_val to handle kv data separately
def add_key(self, key: str) -> None:
self.recent_key = key
Expand Down