Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix xnnpack build
  • Loading branch information
fanchenkong1 committed Jul 28, 2025
commit 2813a81c278643a6e971dbec5a5124d2f2c5267a
4 changes: 2 additions & 2 deletions cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ protoc_linux_x86;https://github.com/protocolbuffers/protobuf/releases/download/v
protoc_linux_aarch64;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-linux-aarch_64.zip;df9d45470b0b8cf939dd2f0ec6b88e9cafc4d617
protoc_mac_universal;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-osx-universal_binary.zip;23710c3d1c2036d8d65a6a22234372fa2d7af9ef
psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013
pthreadpool;https://github.com/google/pthreadpool/archive/4e80ca24521aa0fb3a746f9ea9c3eaa20e9afbb0.zip;bd4ea65c8292801e9555b527a0ecbb2e0092c917
pthreadpool;https://github.com/google/pthreadpool/archive/dcc9f28589066af0dbd4555579281230abbf74dd.zip;533a77943203ef15ca608bcd9dbe2c94da7451d2
pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.13.6.zip;f780292da9db273c8ef06ccf5fd4b623624143e9
pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/8a1772a0c5c447df2d18edf33ec4603a8c9c04a6.zip;85bf8a60dae026b99b6ccd78606c85ed83bfb2cd
pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/de0ce7c7251372892e53ce9bc891750d2c9a4fd8.zip;c45b8d3619b9bccbd26dc5f657959aee38b18b7a
re2;https://github.com/google/re2/archive/refs/tags/2024-07-02.zip;646e1728269cde7fcef990bf4a8e87b047882e88
safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac
tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ if (onnxruntime_USE_XNNPACK)
ENDIF()
ADD_LIBRARY(xnnpack STATIC IMPORTED)
find_library(xnnpack_LIBRARY NAMES XNNPACK)
find_library(microkernels_prod_LIBRARY NAMES microkernels-prod)
find_library(microkernels_prod_LIBRARY NAMES xnnpack-microkernels-prod)
find_package(unofficial-pthreadpool CONFIG REQUIRED)

target_include_directories(xnnpack INTERFACE "${XNNPACK_HDR}")
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/xnnpack.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ onnxruntime_fetchcontent_makeavailable(googlexnnpack)
set(XNNPACK_DIR ${googlexnnpack_SOURCE_DIR})
set(XNNPACK_INCLUDE_DIR ${XNNPACK_DIR}/include)

set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK XNNPACK microkernels-prod pthreadpool)
set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK XNNPACK xnnpack-microkernels-prod pthreadpool)
if(ORT_TARGET_PROCESSOR MATCHES "^arm64.*" AND NOT CMAKE_C_COMPILER_ID STREQUAL "MSVC")
list(APPEND onnxruntime_EXTERNAL_LIBRARIES_XNNPACK kleidiai)
endif()
Expand Down
34 changes: 23 additions & 11 deletions cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f0b3410ae..1e3cb8178 100644
index 94bcad92e3..be7dfe95fd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -337,7 +337,7 @@ ENDIF()
@@ -360,7 +360,7 @@ ENDIF()
# ---[ Build flags
IF(NOT CMAKE_SYSTEM_NAME)
MESSAGE(FATAL_ERROR "CMAKE_SYSTEM_NAME not defined")
Expand All @@ -11,21 +11,30 @@ index f0b3410ae..1e3cb8178 100644
MESSAGE(FATAL_ERROR "Unrecognized CMAKE_SYSTEM_NAME value \"${CMAKE_SYSTEM_NAME}\"")
ENDIF()
IF(CMAKE_SYSTEM_NAME MATCHES "Windows")
@@ -848,7 +848,12 @@ IF(XNNPACK_BUILD_LIBRARY)
TARGET_LINK_LIBRARIES(operator-utils PRIVATE xnnpack-base logging)
TARGET_LINK_LIBRARIES(reference-ukernels PRIVATE xnnpack-base)
TARGET_LINK_LIBRARIES(subgraph PRIVATE xnnpack-base allocator logging memory mutex operators operator-run datatype)
- TARGET_LINK_LIBRARIES(XNNPACK PRIVATE xnnpack-base allocator cache hardware-config indirection memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph datatype reference-ukernels)
@@ -903,10 +903,18 @@ IF(XNNPACK_BUILD_LIBRARY)
TARGET_LINK_LIBRARIES(xnnpack-operator-utils PRIVATE xnnpack-base xnnpack-logging)
TARGET_LINK_LIBRARIES(xnnpack-reference-ukernels PRIVATE xnnpack-base xnnpack-datatype)
TARGET_LINK_LIBRARIES(xnnpack-subgraph PRIVATE xnnpack-base xnnpack-allocator xnnpack-logging xnnpack-memory xnnpack-mutex xnnpack-operators xnnpack-operator-run xnnpack-datatype)
- TARGET_LINK_LIBRARIES(XNNPACK PRIVATE xnnpack-base xnnpack-allocator xnnpack-cache
- xnnpack-hardware-config xnnpack-indirection xnnpack-memory xnnpack-microkernel-utils xnnpack-microparams-init
- xnnpack-mutex xnnpack-normalization xnnpack-operators xnnpack-operator-run xnnpack-operator-utils xnnpack-pack-lh xnnpack-packing
- xnnpack-microkernels-prod xnnpack-subgraph xnnpack-datatype xnnpack-reference-ukernels)
+ IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ # omit microkernels-prod as the list is manually created by ORT in cmake/external/xnnpack.cmake
+ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE xnnpack-base allocator cache hardware-config indirection memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing subgraph datatype reference-ukernels)
+ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE xnnpack-base xnnpack-allocator xnnpack-cache
+ xnnpack-hardware-config xnnpack-indirection xnnpack-memory xnnpack-microkernel-utils xnnpack-microparams-init
+ xnnpack-mutex xnnpack-normalization xnnpack-operators xnnpack-operator-run xnnpack-operator-utils xnnpack-pack-lh xnnpack-packing
+ xnnpack-subgraph xnnpack-datatype xnnpack-reference-ukernels)
+ ELSE()
+ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE xnnpack-base allocator cache hardware-config indirection memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph datatype reference-ukernels)
+ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE xnnpack-base xnnpack-allocator xnnpack-cache
+ xnnpack-hardware-config xnnpack-indirection xnnpack-memory xnnpack-microkernel-utils xnnpack-microparams-init
+ xnnpack-mutex xnnpack-normalization xnnpack-operators xnnpack-operator-run xnnpack-operator-utils xnnpack-pack-lh xnnpack-packing
+ xnnpack-microkernels-prod xnnpack-subgraph xnnpack-datatype xnnpack-reference-ukernels)
+ ENDIF()
TARGET_LINK_LIBRARIES(XNNPACK PUBLIC pthreadpool logging)
TARGET_LINK_LIBRARIES(XNNPACK PUBLIC pthreadpool xnnpack-logging)
SET_TARGET_PROPERTIES(XNNPACK PROPERTIES C_EXTENSIONS YES)
ENDIF()
@@ -857,7 +862,8 @@ IF(NOT MSVC)
@@ -915,7 +923,8 @@ IF(NOT MSVC)
ENDIF()
IF(XNNPACK_TARGET_PROCESSOR STREQUAL "arm")
SET_PROPERTY(SOURCE ${ALL_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -marm ")
Expand All @@ -35,3 +44,6 @@ index f0b3410ae..1e3cb8178 100644
SET_PROPERTY(SOURCE ${ALL_NEON_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=neon ")
SET_PROPERTY(SOURCE ${ALL_NEONFP16_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=neon-fp16 ")
# GCC requires -mfp16-format=ieee to define __fp16 type, but Clang doesn't support this option at all.
--
2.46.1

4 changes: 2 additions & 2 deletions cmake/vcpkg-ports/cpuinfo/portfile.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ endif()
vcpkg_from_github(
OUT_SOURCE_PATH SOURCE_PATH
REPO pytorch/cpuinfo
REF 8a1772a0c5c447df2d18edf33ec4603a8c9c04a6
SHA512 b94ccbfa886221d6bb16513d074675af0a72928a9dd9485dcacdc1124a8a60aacbbe91913a1579e766dfb024f0be1d52eeead40342004ff0238a8b94a095ed08
REF de0ce7c7251372892e53ce9bc891750d2c9a4fd8
SHA512 0fde9210b700d2648d37c8deeb0d5c0d007d8ca5689578dd3bce4c460886b20d7649f0194d2ea06b02238fe9d4f06193599ec3ab5cafb19f1f860b00404264fa
HEAD_REF master
)

Expand Down
26 changes: 19 additions & 7 deletions cmake/vcpkg-ports/pthreadpool/fix-cmakelists.patch
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
From 3621e82fb4b46cfe1960cf13fdea92e8760a305d Mon Sep 17 00:00:00 2001
From: Fanchen Kong <[email protected]>
Date: Wed, 2 Jul 2025 09:58:02 +0800
Subject: [PATCH] fix-cmakelists

---
CMakeLists.txt | 29 ++++++++++++-----------------
1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f06aada..3c6c6e2 100644
index efff8cc..1a0f7e9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,8 +31,6 @@ IF(CCACHE_BINARY)
@@ -41,8 +41,6 @@ IF(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
ENDIF()

# ---[ Options.
Expand All @@ -11,7 +20,7 @@ index f06aada..3c6c6e2 100644
OPTION(PTHREADPOOL_ALLOW_DEPRECATED_API "Enable deprecated API functions" ON)
SET(PTHREADPOOL_SYNC_PRIMITIVE "default" CACHE STRING "Synchronization primitive (condvar, futex, gcd, event, or default) for worker threads")
SET_PROPERTY(CACHE PTHREADPOOL_SYNC_PRIMITIVE PROPERTY STRINGS default condvar futex gcd event)
@@ -41,7 +39,7 @@ IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$")
@@ -51,7 +49,7 @@ IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$")
ELSE()
OPTION(PTHREADPOOL_ENABLE_FASTPATH "Enable fast path using atomic decrement instead of atomic compare-and-swap" OFF)
ENDIF()
Expand All @@ -20,8 +29,8 @@ index f06aada..3c6c6e2 100644
OPTION(PTHREADPOOL_BUILD_TESTS "Build pthreadpool unit tests" ON)
OPTION(PTHREADPOOL_BUILD_BENCHMARKS "Build pthreadpool micro-benchmarks" ON)
ELSE()
@@ -67,7 +65,8 @@ MACRO(PTHREADPOOL_TARGET_ENABLE_CXX11 target)
ENDMACRO()
@@ -71,7 +69,8 @@ IF(PTHREADPOOL_BUILD_TESTS)
ENDIF()

# ---[ Download deps
-IF(NOT DEFINED FXDIV_SOURCE_DIR)
Expand All @@ -30,7 +39,7 @@ index f06aada..3c6c6e2 100644
MESSAGE(STATUS "Downloading FXdiv to ${CMAKE_BINARY_DIR}/FXdiv-source (define FXDIV_SOURCE_DIR to avoid it)")
CONFIGURE_FILE(cmake/DownloadFXdiv.cmake "${CMAKE_BINARY_DIR}/FXdiv-download/CMakeLists.txt")
EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
@@ -118,21 +117,13 @@ ELSE()
@@ -122,21 +121,13 @@ ELSE()
ENDIF()

ADD_LIBRARY(pthreadpool_interface INTERFACE)
Expand All @@ -54,7 +63,7 @@ index f06aada..3c6c6e2 100644

IF(PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "condvar")
TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_FUTEX=0)
@@ -181,18 +172,22 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
@@ -182,18 +173,22 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
ENDIF()

# ---[ Configure FXdiv
Expand All @@ -80,3 +89,6 @@ index f06aada..3c6c6e2 100644

IF(PTHREADPOOL_BUILD_TESTS)
# ---[ Build google test
--
2.46.1

4 changes: 2 additions & 2 deletions cmake/vcpkg-ports/pthreadpool/portfile.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ endif()
vcpkg_from_github(
OUT_SOURCE_PATH SOURCE_PATH
REPO google/pthreadpool
REF 4e80ca24521aa0fb3a746f9ea9c3eaa20e9afbb0
SHA512 776017cc5d2aa94337292f2f4fbd54d099ef29abf736ab8147f07f98f12b7654cbd2fe38d34646a479a519c261ac253bbaf19c6dcbb0ec4cc0859de70f7e6472
REF dcc9f28589066af0dbd4555579281230abbf74dd
SHA512 61853fa8f6c3297d8760be3af1df3f2a00583c1e0e58bdd03cd9cb915e8660a4f2817b22e6463cf53f10de902a1c6204ec6054fcbeada72eeee9e44baeb97178
PATCHES
fix-cmakelists.patch
)
Expand Down
40 changes: 24 additions & 16 deletions cmake/vcpkg-ports/xnnpack/fix-build.patch
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
From 715b9463289feef7d6bf736502a166bd00d70785 Mon Sep 17 00:00:00 2001
From: Fanchen Kong <[email protected]>
Date: Tue, 1 Jul 2025 13:42:35 +0800
Subject: [PATCH] fix-build

---
CMakeLists.txt | 29 ++++++++++-------------------
1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f0b3410ae..ba54c3bfe 100644
index 9f6fb5e256..4387298e59 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1047,9 +1047,11 @@ ENDIF()
IF(XNNPACK_BUILD_ALL_MICROKERNELS)
TARGET_INCLUDE_DIRECTORIES(microkernels-all PRIVATE include src)
@@ -1125,7 +1125,7 @@ ELSE()
ENDIF()
+
TARGET_INCLUDE_DIRECTORIES(datatype PRIVATE include src)
TARGET_INCLUDE_DIRECTORIES(microkernels-prod PRIVATE include src)
-TARGET_INCLUDE_DIRECTORIES(hardware-config PRIVATE include src ${CPUINFO_SOURCE_DIR}/include)
+TARGET_INCLUDE_DIRECTORIES(hardware-config PRIVATE include src)
+
TARGET_INCLUDE_DIRECTORIES(indirection PRIVATE include src)
TARGET_INCLUDE_DIRECTORIES(microparams-init PRIVATE include src)
TARGET_INCLUDE_DIRECTORIES(normalization PRIVATE include src)
@@ -1104,14 +1106,9 @@ IF(NOT TARGET cpuinfo)

INCLUDE_DIRECTORIES(.)
-TARGET_INCLUDE_DIRECTORIES(xnnpack-hardware-config PRIVATE include src ${CPUINFO_SOURCE_DIR}/include)
+TARGET_INCLUDE_DIRECTORIES(xnnpack-hardware-config PRIVATE include src)
IF(XNNPACK_BUILD_LIBRARY)
TARGET_INCLUDE_DIRECTORIES(XNNPACK PUBLIC include)
IF(WIN32)
@@ -1164,14 +1164,9 @@ IF(NOT TARGET cpuinfo)
"${CPUINFO_SOURCE_DIR}"
"${CMAKE_BINARY_DIR}/cpuinfo")
ELSE()
Expand All @@ -33,7 +38,7 @@ index f0b3410ae..ba54c3bfe 100644
ENDIF()
ENDIF()
IF(XNNPACK_BUILD_LIBRARY)
@@ -1129,16 +1126,12 @@ IF(NOT TARGET pthreadpool)
@@ -1189,16 +1184,12 @@ IF(NOT TARGET pthreadpool)
"${PTHREADPOOL_SOURCE_DIR}"
"${CMAKE_BINARY_DIR}/pthreadpool")
ELSE()
Expand All @@ -53,7 +58,7 @@ index f0b3410ae..ba54c3bfe 100644
ENDIF()
ENDIF()
TARGET_LINK_LIBRARIES(xnnpack-base INTERFACE pthreadpool)
@@ -1152,12 +1145,12 @@ IF(NOT TARGET fxdiv)
@@ -1212,12 +1203,12 @@ IF(NOT TARGET fxdiv)
"${FXDIV_SOURCE_DIR}"
"${CMAKE_BINARY_DIR}/FXdiv")
ELSE()
Expand All @@ -69,3 +74,6 @@ index f0b3410ae..ba54c3bfe 100644
SET_PROPERTY(TARGET fxdiv PROPERTY LINKER_LANGUAGE C)
ENDIF()
ENDIF()
--
2.46.1

6 changes: 3 additions & 3 deletions onnxruntime/core/providers/xnnpack/math/gemm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ Status Gemm::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr,

// flags - 1 - for no transpose - 0 for transpose
uint32_t flags = trans_B_ == CblasTrans ? 0 : XNN_FLAG_TRANSPOSE_WEIGHTS;
auto code_cache = GetCodeCache();
// auto code_cache = GetCodeCache();
auto weights_cache = GetWeightsCache();
xnn_status status = xnn_status::xnn_status_uninitialized;
struct xnn_operator* p = nullptr;
Expand All @@ -159,7 +159,7 @@ Status Gemm::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr,
bias_data, // const float* bias,
foutput_min, foutput_max,
flags,
code_cache, weights_cache,
/*code_cache, */weights_cache,
&p);
} else if (op_compute_type_ == OpComputeType::op_compute_type_fp16) {
const MLFloat16* bias_data = nullptr;
Expand All @@ -175,7 +175,7 @@ Status Gemm::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr,
bias_data, // const float* bias,
foutput_min, foutput_max,
flags,
code_cache, weights_cache,
/*code_cache, */weights_cache,
&p);
}

Expand Down
8 changes: 4 additions & 4 deletions onnxruntime/core/providers/xnnpack/math/matmul.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,10 @@ Status MatMul::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc,
}

#ifdef XNN_CACHE_ENABLE
xnn_code_cache_t code_cache = GetCodeCache();
// xnn_code_cache_t code_cache = GetCodeCache();
xnn_weights_cache_t weight_cache = GetWeightsCache();
#else
xnn_code_cache_t code_cache = nullptr;
// xnn_code_cache_t code_cache = nullptr;
xnn_weights_cache_t weight_cache = nullptr;
#endif

Expand All @@ -122,7 +122,7 @@ Status MatMul::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc,
foutput_min,
foutput_max,
flags,
code_cache,
// code_cache,
weight_cache,
&p);
} else if (op_type_ == OpComputeType::op_compute_type_fp16) {
Expand All @@ -136,7 +136,7 @@ Status MatMul::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc,
foutput_min,
foutput_max,
flags,
code_cache,
// code_cache,
weight_cache,
&p);
}
Expand Down
Loading