Skip to content

Commit aaa5c37

Browse files
authored
Merge pull request #5465 from ChipKerchner/addRVVVectorizedFP16Packing
Add vectorized packing for FP16 and BF16 for RISC-V. Reactivate vector packing for FP64 transposed
2 parents c1f607c + 07d0e74 commit aaa5c37

File tree

6 files changed

+844
-0
lines changed

6 files changed

+844
-0
lines changed

kernel/riscv64/KERNEL.RISCV64_ZVL128B

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,13 +120,21 @@ endif
120120

121121
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N)_zvl128b.c
122122
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
123+
ifneq ($(filter $(DGEMM_UNROLL_N),4 8 16),)
124+
DGEMMOTCOPY = gemm_tcopy_$(DGEMM_UNROLL_N)_rvv.c
125+
else
123126
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
127+
endif
124128
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
125129
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
126130

127131
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
128132
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
133+
ifneq ($(filter $(DGEMM_UNROLL_M),4 8 16),)
134+
DGEMMITCOPY = gemm_tcopy_$(DGEMM_UNROLL_M)_rvv.c
135+
else
129136
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
137+
endif
130138
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
131139
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
132140
endif
@@ -258,8 +266,13 @@ endif
258266

259267
ifeq ($(BUILD_HFLOAT16), 1)
260268
SHGEMMKERNEL = shgemm_kernel_$(SHGEMM_UNROLL_M)x$(SHGEMM_UNROLL_N)_zvl128b.c
269+
ifneq ($(filter $(SHGEMM_UNROLL_N),8 16),)
270+
SHGEMMONCOPY = gemm_ncopy_$(SHGEMM_UNROLL_N)fp_rvv.c
271+
SHGEMMOTCOPY = gemm_tcopy_$(SHGEMM_UNROLL_N)fp_rvv.c
272+
else
261273
SHGEMMONCOPY = ../generic/gemm_ncopy_$(SHGEMM_UNROLL_N).c
262274
SHGEMMOTCOPY = ../generic/gemm_tcopy_$(SHGEMM_UNROLL_N).c
275+
endif
263276
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX)
264277
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX)
265278
ifndef SHGEMM_BETA
@@ -269,8 +282,13 @@ endif
269282

270283
ifeq ($(BUILD_BFLOAT16), 1)
271284
SBGEMMKERNEL = sbgemm_kernel_$(SBGEMM_UNROLL_M)x$(SBGEMM_UNROLL_N)_zvl128b.c
285+
ifneq ($(filter $(SBGEMM_UNROLL_N),8 16),)
286+
SBGEMMONCOPY = gemm_ncopy_$(SBGEMM_UNROLL_N)fp_rvv.c
287+
SBGEMMOTCOPY = gemm_tcopy_$(SBGEMM_UNROLL_N)fp_rvv.c
288+
else
272289
SBGEMMONCOPY = ../generic/gemm_ncopy_$(SBGEMM_UNROLL_N).c
273290
SBGEMMOTCOPY = ../generic/gemm_tcopy_$(SBGEMM_UNROLL_N).c
291+
endif
274292
SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX)
275293
SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
276294
ifndef SBGEMM_BETA

kernel/riscv64/KERNEL.RISCV64_ZVL256B

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,12 +119,20 @@ endif
119119

120120
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N)_zvl256b.c
121121
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
122+
ifneq ($(filter $(DGEMM_UNROLL_N),4 8 16),)
123+
DGEMMOTCOPY = gemm_tcopy_$(DGEMM_UNROLL_N)_rvv.c
124+
else
122125
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
126+
endif
123127
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
124128
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
125129
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
126130
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
131+
ifneq ($(filter $(DGEMM_UNROLL_M),4 8 16),)
132+
DGEMMITCOPY = gemm_tcopy_$(DGEMM_UNROLL_M)_rvv.c
133+
else
127134
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
135+
endif
128136
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
129137
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
130138
endif
@@ -226,13 +234,23 @@ SOMATCOPY_CT = omatcopy_ct_rvv.c
226234
ifeq ($(BUILD_HFLOAT16), 1)
227235
SHGEMMKERNEL = shgemm_kernel_$(SHGEMM_UNROLL_M)x$(SHGEMM_UNROLL_N)_zvl256b.c
228236
ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N))
237+
ifneq ($(filter $(SHGEMM_UNROLL_M),8 16),)
238+
SHGEMMINCOPY = gemm_ncopy_$(SHGEMM_UNROLL_M)fp_rvv.c
239+
SHGEMMITCOPY = gemm_tcopy_$(SHGEMM_UNROLL_M)fp_rvv.c
240+
else
229241
SHGEMMINCOPY = ../generic/gemm_ncopy_$(SHGEMM_UNROLL_M).c
230242
SHGEMMITCOPY = ../generic/gemm_tcopy_$(SHGEMM_UNROLL_M).c
243+
endif
231244
SHGEMMINCOPYOBJ = shgemm_incopy$(TSUFFIX).$(SUFFIX)
232245
SHGEMMITCOPYOBJ = shgemm_itcopy$(TSUFFIX).$(SUFFIX)
233246
endif
247+
ifneq ($(filter $(SHGEMM_UNROLL_N),8 16),)
248+
SHGEMMONCOPY = gemm_ncopy_$(SHGEMM_UNROLL_N)fp_rvv.c
249+
SHGEMMOTCOPY = gemm_tcopy_$(SHGEMM_UNROLL_N)fp_rvv.c
250+
else
234251
SHGEMMONCOPY = ../generic/gemm_ncopy_$(SHGEMM_UNROLL_N).c
235252
SHGEMMOTCOPY = ../generic/gemm_tcopy_$(SHGEMM_UNROLL_N).c
253+
endif
236254
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX)
237255
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX)
238256
ifndef SHGEMM_BETA
@@ -243,13 +261,23 @@ endif
243261
ifeq ($(BUILD_BFLOAT16), 1)
244262
SBGEMMKERNEL = sbgemm_kernel_$(SBGEMM_UNROLL_M)x$(SBGEMM_UNROLL_N)_zvl256b.c
245263
ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N))
264+
ifneq ($(filter $(SBGEMM_UNROLL_M),8 16),)
265+
SBGEMMINCOPY = gemm_ncopy_$(SBGEMM_UNROLL_M)fp_rvv.c
266+
SBGEMMITCOPY = gemm_tcopy_$(SBGEMM_UNROLL_M)fp_rvv.c
267+
else
246268
SBGEMMINCOPY = ../generic/gemm_ncopy_$(SBGEMM_UNROLL_M).c
247269
SBGEMMITCOPY = ../generic/gemm_tcopy_$(SBGEMM_UNROLL_M).c
270+
endif
248271
SBGEMMINCOPYOBJ = sbgemm_incopy$(TSUFFIX).$(SUFFIX)
249272
SBGEMMITCOPYOBJ = sbgemm_itcopy$(TSUFFIX).$(SUFFIX)
250273
endif
274+
ifneq ($(filter $(SBGEMM_UNROLL_N),8 16),)
275+
SBGEMMONCOPY = gemm_ncopy_$(SBGEMM_UNROLL_N)fp_rvv.c
276+
SBGEMMOTCOPY = gemm_tcopy_$(SBGEMM_UNROLL_N)fp_rvv.c
277+
else
251278
SBGEMMONCOPY = ../generic/gemm_ncopy_$(SBGEMM_UNROLL_N).c
252279
SBGEMMOTCOPY = ../generic/gemm_tcopy_$(SBGEMM_UNROLL_N).c
280+
endif
253281
SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX)
254282
SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
255283
ifndef SBGEMM_BETA

0 commit comments

Comments
 (0)