File tree Expand file tree Collapse file tree 1 file changed +4
-1
lines changed
Expand file tree Collapse file tree 1 file changed +4
-1
lines changed Original file line number Diff line number Diff line change @@ -624,6 +624,7 @@ enum tensor_offloading_levels {
624624 TENSOR_OFFLOAD_FFN,
625625 TENSOR_OFFLOAD_ATTN,
626626 TENSOR_OFFLOAD_MLP_PRED,
627+ TENSOR_OFFLOAD_FFN_IO,
627628 TENSOR_OFFLOAD_OUTPUT,
628629 TENSOR_OFFLOAD_KV_CACHE,
629630};
@@ -641,8 +642,10 @@ tensor_offloading_levels get_offloading_level(llm_tensor tensor) {
641642 case LLM_TENSOR_ATTN_Q_NORM: case LLM_TENSOR_ATTN_K_NORM:
642643 return TENSOR_OFFLOAD_ATTN;
643644 case LLM_TENSOR_FFN_GATE: case LLM_TENSOR_FFN_DOWN: case LLM_TENSOR_FFN_UP:
644- case LLM_TENSOR_FFN_NORM: case LLM_TENSOR_FFN_DOWN_T:
645+ case LLM_TENSOR_FFN_DOWN_T:
645646 return TENSOR_OFFLOAD_FFN;
647+ case LLM_TENSOR_FFN_NORM:
648+ return TENSOR_OFFLOAD_FFN_IO;
646649 case LLM_TENSOR_MLP_PRED_FC1: case LLM_TENSOR_MLP_PRED_FC2:
647650 return TENSOR_OFFLOAD_MLP_PRED;
648651 default :
You can’t perform that action at this time.
0 commit comments