Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
73 commits
Select commit Hold shift + click to select a range
874c341
support splits in convert.py
christianazinn Apr 27, 2024
72cbd4e
Support split by size and dry run to write estimated shards/filesizes
christianazinn Apr 28, 2024
702a744
Move split functionality to new GGUFManager class
christianazinn Apr 28, 2024
c33bdf3
fix improper function signature
christianazinn Apr 29, 2024
b7c6120
tentative push of convert-hf-to-gguf support
christianazinn May 5, 2024
14b3291
Merge branch 'master' into convert-split
mofosyne May 9, 2024
87a98a5
resolve merge + SplitArguments for easier parsing
christianazinn May 10, 2024
2dd7841
Merge remote-tracking branch 'origin' into convert-split
christianazinn May 23, 2024
3ff27ef
Fix eager tensor memory leak and remove convert.py changes
christianazinn May 23, 2024
6b5c375
refactor SplitStrategy to be a deque
christianazinn May 24, 2024
09baf2f
fix Q8 quantization
christianazinn Jun 3, 2024
240243e
remove unnecessary imports in gguf_manager
christianazinn Jun 3, 2024
140eb52
Merge branch 'master' into convert-split
christianazinn Jun 3, 2024
a9c7703
fix final? merge issue
christianazinn Jun 3, 2024
efead04
fix gguf_writer placement and remove comments
christianazinn Jun 3, 2024
c8ecbc6
oops, actually fix gguf_writer placement
christianazinn Jun 3, 2024
3e9430d
reduce duplicated code from gguf_writer
christianazinn Jun 5, 2024
f6fd3ea
further simplify GGUFManager
christianazinn Jun 5, 2024
bb5ee02
simplify even further and standardize with GGUFWriter
christianazinn Jun 5, 2024
5ad397d
reduce diffs with master
christianazinn Jun 5, 2024
ce7e698
form shards while adding tensors, SHA256 sums agree with master
christianazinn Jun 5, 2024
706bd69
re-add type hint
christianazinn Jun 6, 2024
6a05183
GGUFWriter compatibility fix
christianazinn Jun 6, 2024
3328b0a
Shard dataclass and un-negative dont_add_architecture
christianazinn Jun 6, 2024
1cbab22
type consistency in format_n_bytes_to_str
christianazinn Jun 6, 2024
2037eab
move kv keys to constants.py
christianazinn Jun 6, 2024
83e4a3f
make pathlib explicit
christianazinn Jun 6, 2024
13ffe22
base-1024 bytes to base-1000
christianazinn Jun 6, 2024
6d3a256
rename GGUFManager to GGUFWriterSplit
christianazinn Jun 7, 2024
1312e28
Update gguf-py/gguf/constants.py
christianazinn Jun 7, 2024
5f29d4a
fix convert-hf-to-gguf.py permissions
christianazinn Jun 7, 2024
0283fc1
fix line endings
christianazinn Jun 7, 2024
dc5cf5f
Update gguf-py/gguf/gguf_writer_split.py
christianazinn Jun 7, 2024
e093dfb
convert-hf : restore executable file permission
compilade Jun 7, 2024
9576965
examples/convert-legacy-llama.py: restore executable file permission
christianazinn Jun 8, 2024
c6ae1d6
reinstate original gguf package import and fix type annotation
christianazinn Jun 8, 2024
2e70fa1
attempt to appease the linter
christianazinn Jun 8, 2024
891b19c
attempt 2 to appease the linter
christianazinn Jun 8, 2024
02be0dd
attempt 3 to appease the linter
christianazinn Jun 8, 2024
f658e91
comma consistency
christianazinn Jun 8, 2024
079dfe3
Update convert-hf-to-gguf.py
christianazinn Jun 8, 2024
282e71f
edit cmd line args
christianazinn Jun 9, 2024
666bb09
Merge branch 'master' into convert-split
christianazinn Jun 9, 2024
03cc9bc
use simplification from #7827
christianazinn Jun 9, 2024
97dd416
kv/ti data are still wrong
christianazinn Jun 9, 2024
ff2dd7d
try to refactor kv data (still fails)
christianazinn Jun 9, 2024
ba1be97
fix ti data messiness
christianazinn Jun 9, 2024
69d6e7a
Merge branch 'master' into convert-split
christianazinn Jun 9, 2024
0779f2f
tidy up
christianazinn Jun 9, 2024
a234bf8
fix linting
christianazinn Jun 9, 2024
49b9fbe
actually make the linter happy
christianazinn Jun 9, 2024
0471f67
cleanup round 1
christianazinn Jun 9, 2024
5a96b8f
remove SplitStrategy, SplitArguments
christianazinn Jun 9, 2024
f7ecd99
appease linter
christianazinn Jun 9, 2024
9d7f694
fix typing and clean up
christianazinn Jun 9, 2024
0417104
fix linting
christianazinn Jun 9, 2024
70a6bc9
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 9, 2024
1e2d9cb
progress bar, fix split logic
christianazinn Jun 9, 2024
f7e7983
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
79bd2bf
catch oversights
christianazinn Jun 10, 2024
7eea552
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
99f9a24
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
ad02c94
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
c1b1a29
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
4550826
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
efa0609
swap bar orders
christianazinn Jun 10, 2024
b843445
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
854bd64
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
05b183f
compatibility fix
christianazinn Jun 10, 2024
e9895d2
Update gguf-py/gguf/gguf_writer.py
christianazinn Jun 10, 2024
4e4e376
Merge branch 'master' into convert-split
christianazinn Jun 15, 2024
163712e
Update convert-hf-to-gguf.py
mofosyne Jun 23, 2024
6e4182c
Merge branch 'master' into convert-split
christianazinn Jun 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix typing and clean up
  • Loading branch information
christianazinn committed Jun 9, 2024
commit 9d7f694438f347d0bd1f79044ef6fa565caeb236
6 changes: 5 additions & 1 deletion convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class Model:
model_arch: gguf.MODEL_ARCH

def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool,
model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = 0, small_first_shard: bool = 0):
model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False):
if type(self) is Model:
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
self.dir_model = dir_model
Expand Down Expand Up @@ -2875,6 +2875,10 @@ def main() -> None:
"auto": gguf.LlamaFileType.GUESSED,
}

if args.use_temp_file and (args.split_max_tensors > 0 or args.split_max_size != "0"):
logger.error("Error: Cannot use temp file when splitting")
sys.exit(1)

if args.outfile is not None:
fname_out = args.outfile
else:
Expand Down
95 changes: 34 additions & 61 deletions gguf-py/gguf/gguf_writer.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some of them are related to things a static type checker could point out.

That seems to have been a running theme. I really ought to get one. It should be much better now.

It's better. Down to 6 errors.

I might as well include them all here so that you know where to look (click to expand)

Most of them can be fixed with assert self.fout is not None in the right places.

$ pyright gguf-py/gguf/gguf_writer.py 
./gguf-py/gguf/gguf_writer.py
  ./gguf-py/gguf/gguf_writer.py:123:24 - error: Argument of type "PathLike[str] | str | None" cannot be assigned to parameter "args" of type "StrPath" in function "__new__" (reportArgumentType)
  ./gguf-py/gguf/gguf_writer.py:165:28 - error: Argument of type "list[BufferedWriter] | None" cannot be assigned to parameter "obj" of type "Sized" in function "len"
    Type "list[BufferedWriter] | None" is incompatible with type "Sized"
      "None" is incompatible with protocol "Sized"
        "__len__" is not present (reportArgumentType)
  ./gguf-py/gguf/gguf_writer.py:172:76 - error: Argument of type "list[BufferedWriter] | None" cannot be assigned to parameter "obj" of type "Sized" in function "len"
    Type "list[BufferedWriter] | None" is incompatible with type "Sized"
      "None" is incompatible with protocol "Sized"
        "__len__" is not present (reportArgumentType)
  ./gguf-py/gguf/gguf_writer.py:183:20 - error: Argument of type "list[BufferedWriter] | None" cannot be assigned to parameter "obj" of type "Sized" in function "len"
    Type "list[BufferedWriter] | None" is incompatible with type "Sized"
      "None" is incompatible with protocol "Sized"
        "__len__" is not present (reportArgumentType)
  ./gguf-py/gguf/gguf_writer.py:188:43 - error: Argument of type "list[BufferedWriter] | None" cannot be assigned to parameter "iter1" of type "Iterable[_T1@__new__]" in function "__new__"
    Type "list[BufferedWriter] | None" is incompatible with type "Iterable[BufferedWriter]"
      "None" is incompatible with protocol "Iterable[BufferedWriter]"
        "__iter__" is not present (reportArgumentType)
  ./gguf-py/gguf/gguf_writer.py:412:48 - error: Argument of type "list[BufferedWriter]" cannot be assigned to parameter "fdst" of type "SupportsWrite[AnyStr@copyfileobj]" in function "copyfileobj"
    "list[BufferedWriter]" is incompatible with protocol "SupportsWrite[bytes]"
      "write" is not present (reportArgumentType)
6 errors, 0 warnings, 0 informations

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the last one deals with temp_file which there doesn't seem to be a good way to handle splitting for. For now I'm just going to prevent using temp file with splits.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now I'm just going to prevent using temp file with splits.

I'm fine with that.

It might still be possible to allow temp_file by adding one tempfile per shard, though this might use too much memory, since the temp files are spooled in memory (256MiB by default).

https://github.com/ggerganov/llama.cpp/blob/0417104397e54dce71cd2c6e9c23b11f2acf0d60/gguf-py/gguf/gguf_writer.py#L320

So I think preventing temp files when splitting like you did is the right choice.

Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from enum import Enum, auto
from pathlib import Path
from io import BufferedWriter
from typing import IO, Any, Sequence, Mapping, TypeAlias
from typing import IO, Any, Sequence, Mapping
from string import ascii_letters, digits

import numpy as np
Expand All @@ -33,11 +33,6 @@


SHARD_NAME_FORMAT = "{:s}-{:05d}-of-{:05d}.gguf"
NUM_SHARD_KV_DATA = 3
METADATA_ONLY_INDICATOR = -1

KVTempData: TypeAlias = dict[str, tuple[Any, GGUFValueType | None]] # {key: (value, type)}
TensorTempData: TypeAlias = tuple[str, np.ndarray[Any, Any], GGMLQuantizationType | None] # (tensor name, tensor data, tensor dtype)


@dataclass
Expand Down Expand Up @@ -65,7 +60,7 @@ class WriterState(Enum):

class GGUFWriter:
fout: list[BufferedWriter] | None
path: os.PathLike[str] | str | None
path: Path | None
temp_file: tempfile.SpooledTemporaryFile[bytes] | None
tensors: list[dict[str, TensorInfo]]
kv_data: list[dict[str, GGUFValue]]
Expand All @@ -88,15 +83,15 @@ def __init__(
self, path: os.PathLike[str] | str | None, arch: str, use_temp_file: bool = False, endianess: GGUFEndian = GGUFEndian.LITTLE,
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False
):
self.fout = []
self.path = path
self.fout = None
self.path = Path(path) if path else None
self.arch = arch
self.endianess = endianess
self.data_alignment = GGUF_DEFAULT_ALIGNMENT
self.use_temp_file = use_temp_file
self.temp_file = None
self.tensors = []
self.kv_data = [dict()]
self.tensors = [{}]
self.kv_data = [{}]
self.split_max_tensors = split_max_tensors
self.split_max_size = split_max_size
self.dry_run = dry_run
Expand All @@ -107,30 +102,16 @@ def __init__(
self.state = WriterState.NO_FILE

if self.small_first_shard:
self.tensors.append(dict())
self.tensors.append({})

self.add_architecture()

def verify_arguments(self) -> None:
if len(self.tensors) == 1:
logger.warning("Model fails split requirements, not splitting")

# no shards are created when writing vocab so make one
if not self.tensors or len(self.tensors) == 0:
self.tensors = [dict()]

def format_shard_names(self, path: os.PathLike[str] | str | None = None) -> list[os.PathLike[str]]:
pathobj = Path(path)
def format_shard_names(self, path: Path) -> list[Path]:
if len(self.tensors) == 1:
return [pathobj]

shard_names = []
for i in range(len(self.tensors)):
shard_names.append(pathobj.with_name(SHARD_NAME_FORMAT.format(pathobj.stem, i + 1, len(self.tensors))))

return shard_names
return [path]
return [path.with_name(SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))) for i in range(len(self.tensors))]

def open_output_file(self, path: os.PathLike[str] | str | None = None) -> None:
def open_output_file(self, path: Path | None = None) -> None:
if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path):
# allow calling this multiple times as long as the path is the same
return
Expand All @@ -141,14 +122,14 @@ def open_output_file(self, path: os.PathLike[str] | str | None = None) -> None:
self.path = path

if self.path is not None:
self.fout = []
for fout in self.format_shard_names(self.path):
self.fout.append(open(fout, "wb"))
self.print_plan()
self.fout = [open(filename, "wb") for filename in self.format_shard_names(self.path)]
self.state = WriterState.EMPTY

def print_plan(self, path: os.PathLike[str] | str | None = None) -> None:
def print_plan(self) -> None:
logger.info("Writing the following files:")
filenames = self.format_shard_names(path)
assert self.path is not None
filenames = self.format_shard_names(self.path)
assert len(filenames) == len(self.tensors)
for name, tensors in zip(filenames, self.tensors):
logger.info(f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}")
Expand All @@ -162,24 +143,28 @@ def add_shard_kv_data(self) -> None:
return

total_tensors = sum(len(t) for t in self.tensors)
for i in range(len(self.fout)):
assert self.fout is not None
total_splits = len(self.fout)
for i in range(total_splits):
# just see whether it exists
try:
self.kv_data[i]
except IndexError:
self.kv_data.append(dict())
self.kv_data[i][Keys.Split.LLM_KV_SPLIT_NO] = GGUFValue(i, GGUFValueType.UINT16)
self.kv_data[i][Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(len(self.fout), GGUFValueType.UINT16)
self.kv_data[i][Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(total_splits, GGUFValueType.UINT16)
self.kv_data[i][Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(total_tensors, GGUFValueType.INT32)

def write_header_to_file(self, path: os.PathLike[str] | str | None = None) -> None:
self.verify_arguments()
self.print_plan(path)
def write_header_to_file(self, path: Path | None = None) -> None:
if len(self.tensors) == 1:
logger.warning("Model fails split requirements, not splitting")

self.open_output_file(path)

if self.state is not WriterState.EMPTY:
raise ValueError(f'Expected output file to be empty, got {self.state}')

assert self.fout is not None
assert len(self.fout) == len(self.tensors)
assert len(self.kv_data) == 1

Expand Down Expand Up @@ -216,7 +201,6 @@ def write_ti_data_to_file(self) -> None:
assert self.fout is not None

for fout, tensors in zip(self.fout, self.tensors):
assert fout is not None
ti_data = bytearray()
offset_tensor = 0

Expand All @@ -235,7 +219,7 @@ def write_ti_data_to_file(self) -> None:
self.state = WriterState.TI_DATA

def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
if key in self.kv_data:
if any(key in kv_data for kv_data in self.kv_data):
raise ValueError(f'Duplicated key name {key!r}')

self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
Expand Down Expand Up @@ -279,9 +263,6 @@ def add_string(self, key: str, val: str) -> None:
self.add_key_value(key, val, GGUFValueType.STRING)

def add_array(self, key: str, val: Sequence[Any]) -> None:
if not isinstance(val, Sequence):
raise ValueError("Value must be a sequence for array type")

self.add_key_value(key, val, GGUFValueType.ARRAY)

@staticmethod
Expand All @@ -295,9 +276,8 @@ def add_tensor_info(
if self.state is not WriterState.NO_FILE:
raise ValueError(f'Expected output file to be not yet opened, got {self.state}')

for tensors in self.tensors:
if name in tensors:
raise ValueError(f'Duplicated tensor name {name!r}')
if any(name in tensors for tensors in self.tensors):
raise ValueError(f'Duplicated tensor name {name!r}')

if raw_dtype is None:
if tensor_dtype == np.float16:
Expand All @@ -321,10 +301,8 @@ def add_tensor_info(
if tensor_dtype == np.uint8:
tensor_shape = quant_shape_from_byte_shape(tensor_shape, raw_dtype)

# create splits as necessary, such as to start it off
if (len(self.tensors) == self.small_first_shard \
# or split when over tensor limit
or self.split_max_tensors != 0 and \
# split when over tensor limit
if (self.split_max_tensors != 0 and \
len(self.tensors[-1]) >= self.split_max_tensors \
# or split when over size limit
or self.split_max_size != 0 and \
Expand Down Expand Up @@ -369,7 +347,6 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
tensor.byteswap(inplace=True)

for fout in self.fout:
assert fout is not None
self.write_padding(fout, fout.tell())
tensor.tofile(fout)
self.write_padding(fout, tensor.nbytes)
Expand All @@ -382,12 +359,10 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
assert self.fout is not None

for fout in self.fout:
assert fout is not None
self.write_padding(fout, fout.tell())

if self.temp_file is None:
for fout, tensors in zip(self.fout, self.tensors):
assert fout is not None
bar = None

if progress:
Expand All @@ -409,7 +384,8 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
else:
self.temp_file.seek(0)

shutil.copyfileobj(self.temp_file, self.fout)
assert self.fout is not None
shutil.copyfileobj(self.temp_file, self.fout[0 if not self.small_first_shard else 1])
self.flush()
self.temp_file.close()

Expand All @@ -418,14 +394,12 @@ def write_tensors_to_file(self, *, progress: bool = False) -> None:
def flush(self) -> None:
assert self.fout is not None
for fout in self.fout:
assert fout is not None
fout.flush()

def close(self) -> None:
if self.fout is not None:
for fout in self.fout:
if fout is not None:
fout.close()
fout.close()
self.fout = []

def add_architecture(self) -> None:
Expand Down Expand Up @@ -705,12 +679,11 @@ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
return kv_data

def _write_packed(self, fout: BufferedWriter, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None:
assert fout is not None
fout.write(self._pack(fmt, value, skip_pack_prefix))

@staticmethod
def format_n_bytes_to_str(num: int) -> str:
if num == METADATA_ONLY_INDICATOR:
if num == 0:
return "negligible - metadata only"
fnum = float(num)
for unit in ("", "K", "M", "G"):
Expand Down