From b8b5ee4934c861fa26286e955c288a99ce3c2027 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Fri, 22 Dec 2023 15:51:42 -0800 Subject: [PATCH 1/2] Add inlien keyword to avx512_qsort _Float16 --- src/avx512fp16-16bit-qsort.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/avx512fp16-16bit-qsort.hpp b/src/avx512fp16-16bit-qsort.hpp index 8a9a49ed..1206f822 100644 --- a/src/avx512fp16-16bit-qsort.hpp +++ b/src/avx512fp16-16bit-qsort.hpp @@ -145,7 +145,7 @@ replace_inf_with_nan(_Float16 *arr, int64_t arrsize, int64_t nan_count) } template <> -void avx512_qselect(_Float16 *arr, int64_t k, int64_t arrsize) +inline void avx512_qselect(_Float16 *arr, int64_t k, int64_t arrsize) { if (arrsize > 1) { int64_t nan_count = replace_nan_with_inf(arr, arrsize); @@ -156,7 +156,7 @@ void avx512_qselect(_Float16 *arr, int64_t k, int64_t arrsize) } template <> -void avx512_qsort(_Float16 *arr, int64_t arrsize) +inline void avx512_qsort(_Float16 *arr, int64_t arrsize) { if (arrsize > 1) { int64_t nan_count = replace_nan_with_inf(arr, arrsize); From c20fb421455db62e764897f07c421b8b5b39fa7d Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Fri, 22 Dec 2023 15:59:04 -0800 Subject: [PATCH 2/2] More inline --- src/avx512-16bit-qsort.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/avx512-16bit-qsort.hpp b/src/avx512-16bit-qsort.hpp index 606f8706..2eb44542 100644 --- a/src/avx512-16bit-qsort.hpp +++ b/src/avx512-16bit-qsort.hpp @@ -350,7 +350,7 @@ struct zmm_vector { }; template <> -bool comparison_func>(const uint16_t &a, const uint16_t &b) +inline bool comparison_func>(const uint16_t &a, const uint16_t &b) { uint16_t signa = a & 0x8000, signb = b & 0x8000; uint16_t expa = a & 0x7c00, expb = b & 0x7c00; @@ -406,7 +406,7 @@ replace_inf_with_nan(uint16_t *arr, int64_t arrsize, int64_t nan_count) } template <> -void avx512_qselect(int16_t *arr, int64_t k, int64_t arrsize) +inline void avx512_qselect(int16_t *arr, int64_t k, int64_t arrsize) { if (arrsize > 1) { qselect_16bit_, int16_t>( @@ -415,7 +415,7 @@ void avx512_qselect(int16_t *arr, int64_t k, int64_t arrsize) } template <> -void avx512_qselect(uint16_t *arr, int64_t k, int64_t arrsize) +inline void avx512_qselect(uint16_t *arr, int64_t k, int64_t arrsize) { if (arrsize > 1) { qselect_16bit_, uint16_t>( @@ -423,7 +423,7 @@ void avx512_qselect(uint16_t *arr, int64_t k, int64_t arrsize) } } -void avx512_qselect_fp16(uint16_t *arr, int64_t k, int64_t arrsize) +inline void avx512_qselect_fp16(uint16_t *arr, int64_t k, int64_t arrsize) { if (arrsize > 1) { int64_t nan_count = replace_nan_with_inf(arr, arrsize); @@ -434,7 +434,7 @@ void avx512_qselect_fp16(uint16_t *arr, int64_t k, int64_t arrsize) } template <> -void avx512_qsort(int16_t *arr, int64_t arrsize) +inline void avx512_qsort(int16_t *arr, int64_t arrsize) { if (arrsize > 1) { qsort_16bit_, int16_t>( @@ -443,7 +443,7 @@ void avx512_qsort(int16_t *arr, int64_t arrsize) } template <> -void avx512_qsort(uint16_t *arr, int64_t arrsize) +inline void avx512_qsort(uint16_t *arr, int64_t arrsize) { if (arrsize > 1) { qsort_16bit_, uint16_t>( @@ -451,7 +451,7 @@ void avx512_qsort(uint16_t *arr, int64_t arrsize) } } -void avx512_qsort_fp16(uint16_t *arr, int64_t arrsize) +inline void avx512_qsort_fp16(uint16_t *arr, int64_t arrsize) { if (arrsize > 1) { int64_t nan_count = replace_nan_with_inf(arr, arrsize);