Skip to content

Commit 906830b

Browse files
authored
fix: offload ffn norm weights (Tiiny-AI#179)
1 parent b478398 commit 906830b

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

llama.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,7 @@ enum tensor_offloading_levels {
624624
TENSOR_OFFLOAD_FFN,
625625
TENSOR_OFFLOAD_ATTN,
626626
TENSOR_OFFLOAD_MLP_PRED,
627+
TENSOR_OFFLOAD_FFN_IO,
627628
TENSOR_OFFLOAD_OUTPUT,
628629
TENSOR_OFFLOAD_KV_CACHE,
629630
};
@@ -641,8 +642,10 @@ tensor_offloading_levels get_offloading_level(llm_tensor tensor) {
641642
case LLM_TENSOR_ATTN_Q_NORM: case LLM_TENSOR_ATTN_K_NORM:
642643
return TENSOR_OFFLOAD_ATTN;
643644
case LLM_TENSOR_FFN_GATE: case LLM_TENSOR_FFN_DOWN: case LLM_TENSOR_FFN_UP:
644-
case LLM_TENSOR_FFN_NORM: case LLM_TENSOR_FFN_DOWN_T:
645+
case LLM_TENSOR_FFN_DOWN_T:
645646
return TENSOR_OFFLOAD_FFN;
647+
case LLM_TENSOR_FFN_NORM:
648+
return TENSOR_OFFLOAD_FFN_IO;
646649
case LLM_TENSOR_MLP_PRED_FC1: case LLM_TENSOR_MLP_PRED_FC2:
647650
return TENSOR_OFFLOAD_MLP_PRED;
648651
default:

0 commit comments

Comments
 (0)