Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
5fa47bf
ggml : remove Q4_0 bit shufling (ARM NEON)
ggerganov May 3, 2023
844d2af
ggml : remove Q4_1 bit shuffling (ARM NEON + reference)
ggerganov May 4, 2023
fd2a137
ggml : nibbles_from_floats() + bytes_from_nibbles() (ARM NEON)
ggerganov May 4, 2023
9f3285f
ggml : remove Q4_2 bit shuffling (WIP, BROKEN)
ggerganov May 4, 2023
aa78dfe
ggml : remove Q5_0 bit shuffling (ARM NEON)
ggerganov May 4, 2023
b37a08f
ggml : 2x faster scalar implementations
ggerganov May 4, 2023
292a778
ggml : remove Q5_1 bit shuffling (ARM NEON + scalar)
ggerganov May 5, 2023
caaacd5
ggml : simplify scalar dot
ggerganov May 5, 2023
0add640
ggml : remove WASM SIMD bit shuffling + remove vzip for ARM 32-bit
ggerganov May 5, 2023
9472d0e
ggml : fix Q4_1 quantization
ggerganov May 7, 2023
cdc9607
ggml : update cuBLAS + normalize variable names
ggerganov May 7, 2023
4bf1c8a
ggml : remove Q4_2 mode
ggerganov May 7, 2023
b08c39b
ggml : minor formatting
ggerganov May 7, 2023
8367455
ggml : fix Q5_0 quantization
ggerganov May 7, 2023
928d2f3
scripts : add script for measuring the time per token
ggerganov May 8, 2023
9e49d20
AVX implementations (#1370)
sw May 8, 2023
489bd13
ggml : uniform 5th bit extraction
ggerganov May 8, 2023
d52172a
llama : produce error upon loading old model files
ggerganov May 9, 2023
09032e0
llama : fix model magic/version write
ggerganov May 9, 2023
b7ad385
ggml : speed-up Q5_0 + Q5_1 at 4 threads
ggerganov May 10, 2023
695f396
ggml : preserve old Q4 and Q5 formats
ggerganov May 11, 2023
582a39f
ggml : simplify Q8_1 - no need for low / high sums anymore
ggerganov May 11, 2023
6680244
ggml : fix Q8_0 and Q8_1 rounding
ggerganov May 11, 2023
bd5e373
Revert "AVX implementations (#1370)"
ggerganov May 11, 2023
5bc286a
ggml : fix AVX2 implementation
ggerganov May 11, 2023
e038e01
sha : update hashes for 7B and 13B
ggerganov May 11, 2023
51c25fd
readme : update timings + remove warning banner
ggerganov May 11, 2023
1c87847
llama : update v2 PR number to 1405
ggerganov May 11, 2023
832c53f
ggml : fix WASM comments
ggerganov May 11, 2023
ca7f069
ggml : back to original bit order
ggerganov May 11, 2023
b58b1f4
readme : add note that Q4 and Q5 have been changed
ggerganov May 11, 2023
cbb6a3a
llama : fix return for unknown version
ggerganov May 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
llama : update v2 PR number to 1405
  • Loading branch information
ggerganov committed May 11, 2023
commit 1c87847b6bf10cf4ecc1d6b4b96f9d8b9449820c
2 changes: 1 addition & 1 deletion llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,7 @@ static const char *llama_file_version_name(llama_file_version version) {
switch (version) {
case LLAMA_FILE_VERSION_GGML: return "'ggml' (old version with low tokenizer quality and no mmap support)";
case LLAMA_FILE_VERSION_GGMF_V1: return "ggmf v1 (old version with no mmap support)";
case LLAMA_FILE_VERSION_GGJT_V1: return "ggjt v1 (pre #1305)";
case LLAMA_FILE_VERSION_GGJT_V1: return "ggjt v1 (pre #1405)";
case LLAMA_FILE_VERSION_GGJT_V2: return "ggjt v2 (latest)";
}
}
Expand Down