Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
5fa47bf
ggml : remove Q4_0 bit shufling (ARM NEON)
ggerganov May 3, 2023
844d2af
ggml : remove Q4_1 bit shuffling (ARM NEON + reference)
ggerganov May 4, 2023
fd2a137
ggml : nibbles_from_floats() + bytes_from_nibbles() (ARM NEON)
ggerganov May 4, 2023
9f3285f
ggml : remove Q4_2 bit shuffling (WIP, BROKEN)
ggerganov May 4, 2023
aa78dfe
ggml : remove Q5_0 bit shuffling (ARM NEON)
ggerganov May 4, 2023
b37a08f
ggml : 2x faster scalar implementations
ggerganov May 4, 2023
292a778
ggml : remove Q5_1 bit shuffling (ARM NEON + scalar)
ggerganov May 5, 2023
caaacd5
ggml : simplify scalar dot
ggerganov May 5, 2023
0add640
ggml : remove WASM SIMD bit shuffling + remove vzip for ARM 32-bit
ggerganov May 5, 2023
9472d0e
ggml : fix Q4_1 quantization
ggerganov May 7, 2023
cdc9607
ggml : update cuBLAS + normalize variable names
ggerganov May 7, 2023
4bf1c8a
ggml : remove Q4_2 mode
ggerganov May 7, 2023
b08c39b
ggml : minor formatting
ggerganov May 7, 2023
8367455
ggml : fix Q5_0 quantization
ggerganov May 7, 2023
928d2f3
scripts : add script for measuring the time per token
ggerganov May 8, 2023
9e49d20
AVX implementations (#1370)
sw May 8, 2023
489bd13
ggml : uniform 5th bit extraction
ggerganov May 8, 2023
d52172a
llama : produce error upon loading old model files
ggerganov May 9, 2023
09032e0
llama : fix model magic/version write
ggerganov May 9, 2023
b7ad385
ggml : speed-up Q5_0 + Q5_1 at 4 threads
ggerganov May 10, 2023
695f396
ggml : preserve old Q4 and Q5 formats
ggerganov May 11, 2023
582a39f
ggml : simplify Q8_1 - no need for low / high sums anymore
ggerganov May 11, 2023
6680244
ggml : fix Q8_0 and Q8_1 rounding
ggerganov May 11, 2023
bd5e373
Revert "AVX implementations (#1370)"
ggerganov May 11, 2023
5bc286a
ggml : fix AVX2 implementation
ggerganov May 11, 2023
e038e01
sha : update hashes for 7B and 13B
ggerganov May 11, 2023
51c25fd
readme : update timings + remove warning banner
ggerganov May 11, 2023
1c87847
llama : update v2 PR number to 1405
ggerganov May 11, 2023
832c53f
ggml : fix WASM comments
ggerganov May 11, 2023
ca7f069
ggml : back to original bit order
ggerganov May 11, 2023
b58b1f4
readme : add note that Q4 and Q5 have been changed
ggerganov May 11, 2023
cbb6a3a
llama : fix return for unknown version
ggerganov May 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
ggml : fix WASM comments
  • Loading branch information
ggerganov committed May 11, 2023
commit 832c53f4274353ec6f16a88d1c0e830526a229fc
5 changes: 3 additions & 2 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -2425,7 +2425,7 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
const v128_t v0l = wasm_v128_and (v0, m4b);
const v128_t v0h = wasm_u8x16_shr(v0, 4);

// add high bit and sub 16
// add high bit and sub 16 (equivalent to sub 0x10 when bit is zero)
const v128_t v0lf = wasm_i8x16_sub(v0l, qhl);
const v128_t v0hf = wasm_i8x16_sub(v0h, qhh);

Expand Down Expand Up @@ -2570,7 +2570,7 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
const int8x16_t v0_1l = vreinterpretq_s8_u8(vandq_u8 (v0_1, m4b));
const int8x16_t v0_1h = vreinterpretq_s8_u8(vshrq_n_u8(v0_1, 4));

// add 5th bit
// add high bit
const int8x16_t v0_0lf = vorrq_s8(v0_0l, qhl0);
const int8x16_t v0_0hf = vorrq_s8(v0_0h, qhh0);
const int8x16_t v0_1lf = vorrq_s8(v0_1l, qhl1);
Expand Down Expand Up @@ -2622,6 +2622,7 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
uint32_t qh;
uint64_t tmp[4];

// TODO: check if unrolling this is better
for (int i = 0; i < nb; ++i) {
const block_q5_1 * restrict x0 = &x[i];
const block_q8_1 * restrict y0 = &y[i];
Expand Down