Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
c7e3f63
[QNN EP] Add Unit tests for LPBQ Fusions (#25592)
quic-tirupath Jul 31, 2025
2bd947a
[QNN-EP] Resolve VTCM buffer sharing bugs (#25622)
quic-calvnguy Aug 1, 2025
eb95592
Update QAIRT to 2.37.0 (#25688)
qti-kromero Aug 8, 2025
86c1eae
[QNN EP] Disable tests broken by QNN 2.37 (#25729)
qti-jkilpatrick Aug 12, 2025
c070271
Fix cleanup of Environment class. (#25743)
skottmckay Aug 15, 2025
c6a60ec
GatherBlockQuantized shape inference test (#25769)
jiafatom Aug 18, 2025
69dc281
[QNN-EP] Fix int64 graph output issue (#25745)
kuanyul-qti Aug 18, 2025
e7be54b
Expose GetOrtvalueInitializer via provider bridge (#25761)
yuslepukhin Aug 18, 2025
d8effa6
[QNN EP] Upgrade QNN to 2.37.1 (#25751)
qti-jkilpatrick Aug 18, 2025
2cca5c9
Fix bug for same option provided multiple times in perf test (#25716)
chilo-ms Aug 19, 2025
392627e
Add some device discovery support for non-Windows platforms (#25228)
edgchen1 Aug 19, 2025
9cac54b
[EP ABI] support `Graph_GetModelMetadata` (#25768)
wcy123 Aug 21, 2025
381d19f
[QNN EP] Fix Pool builder assert in Debug build. (#25788)
minfhong-qti Aug 20, 2025
5731750
[TRT RTX EP] EP context changes (#25747)
thevishalagarwal Aug 21, 2025
106cbcb
[NV TRT RTX EP] Reconfigure memory arena to grow with power of 2 (#25…
gedoensmax Aug 22, 2025
d156d0d
Add patch file for cpuinfo's vcpkg port (#25818)
snnn Aug 22, 2025
8c2266e
Introduce new C++ API for C interfaces (#25762)
yuslepukhin Aug 22, 2025
3cb441f
Add support for generating and validating compiled model compatibilit…
adrastogi Aug 22, 2025
dbf253e
Disable cpuinfo for ARM64EC builds. (#25831)
edgchen1 Aug 22, 2025
9190140
Update mac.yml iphone_simulator job - use Xcode 16.4 and simulator ru…
edgchen1 Aug 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,13 @@ if (onnxruntime_ENABLE_CPUINFO)
set(CPUINFO_SUPPORTED TRUE)
endif()
if (WIN32)
set(CPUINFO_SUPPORTED TRUE)
# There's an error when linking with cpuinfo on arm64ec with a vcpkg build (--use_vcpkg).
# TODO Fix it and then re-enable cpuinfo on arm64ec.
if (onnxruntime_target_platform STREQUAL "ARM64EC")
set(CPUINFO_SUPPORTED FALSE)
else()
set(CPUINFO_SUPPORTED TRUE)
endif()
elseif (NOT ${onnxruntime_target_platform} MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64)$")
message(WARNING
"Target processor architecture \"${onnxruntime_target_platform}\" is not supported in cpuinfo. "
Expand Down
41 changes: 33 additions & 8 deletions cmake/onnxruntime_common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ set(onnxruntime_common_src_patterns
"${ONNXRUNTIME_ROOT}/core/platform/check_intel.h"
"${ONNXRUNTIME_ROOT}/core/platform/check_intel.cc"
"${ONNXRUNTIME_ROOT}/core/platform/device_discovery.h"
"${ONNXRUNTIME_ROOT}/core/platform/device_discovery.cc"
"${ONNXRUNTIME_ROOT}/core/platform/device_discovery_common.cc"
"${ONNXRUNTIME_ROOT}/core/platform/env.h"
"${ONNXRUNTIME_ROOT}/core/platform/env.cc"
"${ONNXRUNTIME_ROOT}/core/platform/env_time.h"
Expand All @@ -32,26 +32,38 @@ set(onnxruntime_common_src_patterns

if(WIN32)
list(APPEND onnxruntime_common_src_patterns
"${ONNXRUNTIME_ROOT}/core/platform/windows/*.h"
"${ONNXRUNTIME_ROOT}/core/platform/windows/*.cc"
"${ONNXRUNTIME_ROOT}/core/platform/windows/debug_alloc.cc"
"${ONNXRUNTIME_ROOT}/core/platform/windows/debug_alloc.h"
"${ONNXRUNTIME_ROOT}/core/platform/windows/dll_load_error.cc"
"${ONNXRUNTIME_ROOT}/core/platform/windows/dll_load_error.h"
"${ONNXRUNTIME_ROOT}/core/platform/windows/env_time.cc"
"${ONNXRUNTIME_ROOT}/core/platform/windows/env.cc"
"${ONNXRUNTIME_ROOT}/core/platform/windows/env.h"
"${ONNXRUNTIME_ROOT}/core/platform/windows/hardware_core_enumerator.cc"
"${ONNXRUNTIME_ROOT}/core/platform/windows/hardware_core_enumerator.h"
"${ONNXRUNTIME_ROOT}/core/platform/windows/stacktrace.cc"
"${ONNXRUNTIME_ROOT}/core/platform/windows/telemetry.cc"
"${ONNXRUNTIME_ROOT}/core/platform/windows/telemetry.h"
"${ONNXRUNTIME_ROOT}/core/platform/windows/logging/*.h"
"${ONNXRUNTIME_ROOT}/core/platform/windows/logging/*.cc"
)

else()
list(APPEND onnxruntime_common_src_patterns
"${ONNXRUNTIME_ROOT}/core/platform/posix/*.h"
"${ONNXRUNTIME_ROOT}/core/platform/posix/*.cc"
"${ONNXRUNTIME_ROOT}/core/platform/posix/env_time.cc"
"${ONNXRUNTIME_ROOT}/core/platform/posix/env.cc"
"${ONNXRUNTIME_ROOT}/core/platform/posix/stacktrace.cc"
)

# logging files
if (onnxruntime_USE_SYSLOG)
list(APPEND onnxruntime_common_src_patterns
"${ONNXRUNTIME_ROOT}/core/platform/posix/logging/*.h"
"${ONNXRUNTIME_ROOT}/core/platform/posix/logging/*.cc"
)
endif()

if (CMAKE_SYSTEM_NAME STREQUAL "Android")
if (ANDROID)
list(APPEND onnxruntime_common_src_patterns
"${ONNXRUNTIME_ROOT}/core/platform/android/logging/*.h"
"${ONNXRUNTIME_ROOT}/core/platform/android/logging/*.cc"
Expand All @@ -66,6 +78,21 @@ else()
endif()
endif()

# platform-specific device discovery files
if (WIN32)
list(APPEND onnxruntime_common_src_patterns
"${ONNXRUNTIME_ROOT}/core/platform/windows/device_discovery.cc")
elseif (LINUX)
list(APPEND onnxruntime_common_src_patterns
"${ONNXRUNTIME_ROOT}/core/platform/linux/device_discovery.cc")
elseif (APPLE)
list(APPEND onnxruntime_common_src_patterns
"${ONNXRUNTIME_ROOT}/core/platform/apple/device_discovery.cc")
else()
list(APPEND onnxruntime_common_src_patterns
"${ONNXRUNTIME_ROOT}/core/platform/device_discovery_default.cc")
endif()

if(onnxruntime_target_platform STREQUAL "ARM64EC")
if (MSVC)
link_directories("$ENV{VCINSTALLDIR}/Tools/MSVC/$ENV{VCToolsVersion}/lib/ARM64EC")
Expand Down Expand Up @@ -216,8 +243,6 @@ endif()

if (RISCV64 OR ARM64 OR ARM OR X86 OR X64 OR X86_64)
# Link cpuinfo if supported
# Using it mainly in ARM with Android.
# Its functionality in detecting x86 cpu features are lacking, so is support for Windows.
if (CPUINFO_SUPPORTED)
onnxruntime_add_include_to_target(onnxruntime_common cpuinfo::cpuinfo)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES cpuinfo::cpuinfo ${ONNXRUNTIME_CLOG_TARGET_NAME})
Expand Down
22 changes: 22 additions & 0 deletions cmake/vcpkg-ports/cpuinfo/patch_cpuinfo_h_for_arm64ec.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
diff --git a/include/cpuinfo.h b/include/cpuinfo.h
index f1d35d4..9e454d2 100644
--- a/include/cpuinfo.h
+++ b/include/cpuinfo.h
@@ -18,7 +18,7 @@
#define CPUINFO_ARCH_X86 1
#endif

-#if defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
+#if defined(__x86_64__) || defined(__x86_64) || (defined(_M_X64) && !defined(_M_ARM64EC)) || (defined(_M_AMD64) && !defined(_M_ARM64EC))
#define CPUINFO_ARCH_X86_64 1
#endif

@@ -26,7 +26,7 @@
#define CPUINFO_ARCH_ARM 1
#endif

-#if defined(__aarch64__) || defined(_M_ARM64)
+#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
#define CPUINFO_ARCH_ARM64 1
#endif

2 changes: 2 additions & 0 deletions cmake/vcpkg-ports/cpuinfo/portfile.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ vcpkg_from_github(
REF 8a1772a0c5c447df2d18edf33ec4603a8c9c04a6
SHA512 b94ccbfa886221d6bb16513d074675af0a72928a9dd9485dcacdc1124a8a60aacbbe91913a1579e766dfb024f0be1d52eeead40342004ff0238a8b94a095ed08
HEAD_REF master
PATCHES
patch_cpuinfo_h_for_arm64ec.patch
)

vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS
Expand Down
23 changes: 20 additions & 3 deletions include/onnxruntime/core/common/parse_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,30 @@ template <typename T>
std::enable_if_t<detail::ParseWithFromChars<T>, bool>
TryParseStringWithClassicLocale(std::string_view str, T& value) {
T parsed_value{};
const auto [ptr, ec] = std::from_chars(str.data(), str.data() + str.size(), parsed_value);

if (ec != std::errc{}) {
std::from_chars_result conversion_result{};
if constexpr (std::is_integral_v<T> && std::is_unsigned_v<T>) {
// For unsigned integral types, also handle hex values, i.e., those beginning with "0x".
// std::from_chars() does not accept the "0x" prefix.
const bool has_hex_prefix = str.size() >= 2 &&
str[0] == '0' &&
(str[1] == 'x' || str[1] == 'X');

if (has_hex_prefix) {
str = str.substr(2);
}

const int base = has_hex_prefix ? 16 : 10;
conversion_result = std::from_chars(str.data(), str.data() + str.size(), parsed_value, base);
} else {
conversion_result = std::from_chars(str.data(), str.data() + str.size(), parsed_value);
}

if (conversion_result.ec != std::errc{}) {
return false;
}

if (ptr != str.data() + str.size()) {
if (conversion_result.ptr != str.data() + str.size()) {
return false;
}

Expand Down
24 changes: 24 additions & 0 deletions include/onnxruntime/core/framework/execution_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class GraphOptimizerRegistry;
#include "core/framework/framework_provider_common.h"
#include "core/framework/stream_handles.h"
#include "core/framework/tuning_context.h"
#include "core/session/onnxruntime_c_api.h"

struct OrtEpDevice;
struct OrtRunOptions;
Expand Down Expand Up @@ -322,6 +323,29 @@ class IExecutionProvider {
virtual common::Status Compile(const std::vector<FusedNodeAndGraph>& fused_nodes_and_graphs,
std::vector<NodeComputeInfo>& node_compute_funcs);

/**
* Get the compatibility info for a compiled model.
*
* The execution provider determines this value, which denotes the compatibility of the compiled model with the EP.
* This is stored in the model metadata under a key associated with the EP type.
*/
virtual std::string GetCompiledModelCompatibilityInfo(const onnxruntime::GraphViewer& graph_viewer) const {
// graph_viewer and model_metadata are not used in the default implementation.
ORT_UNUSED_PARAMETER(graph_viewer);
// Default implementation returns empty string
return std::string();
}

/**
* Validate the compatibility of a compiled model with this execution provider.
*/
virtual common::Status ValidateCompiledModelCompatibilityInfo(const std::string& /*compatibility_info*/,
OrtCompiledModelCompatibility& model_compatibility) const {
// Default implementation indicates this EP does not support model compatibility validation
model_compatibility = OrtCompiledModelCompatibility_EP_NOT_APPLICABLE;
return Status::OK();
}

#endif

void SetLogger(const logging::Logger* logger) {
Expand Down
2 changes: 1 addition & 1 deletion include/onnxruntime/core/graph/indexed_sub_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ struct IndexedSubGraph {
std::string domain; ///< Domain of customized SubGraph/FunctionProto
int since_version; ///< Since version of customized SubGraph/FunctionProto.

ONNX_NAMESPACE::OperatorStatus status; ///< Status of customized SubGraph/FunctionProto.
ONNX_NAMESPACE::OperatorStatus status{ONNX_NAMESPACE::OperatorStatus::STABLE}; ///< Status of customized SubGraph/FunctionProto.

std::vector<std::string> inputs; ///< Inputs of customized SubGraph/FunctionProto.
std::vector<std::string> outputs; ///< Outputs of customized SubGraph/FunctionProto.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@ constexpr const char* kProfilesMinShapes = "nv_profile_min_shapes";
constexpr const char* kProfilesMaxShapes = "nv_profile_max_shapes";
constexpr const char* kProfilesOptShapes = "nv_profile_opt_shapes";
constexpr const char* kCudaGraphEnable = "nv_cuda_graph_enable";
constexpr const char* kONNXBytestream = "nv_onnx_bytestream";
constexpr const char* kONNXBytestreamSize = "nv_onnx_bytestream_size";
constexpr const char* kMultiProfileEnable = "nv_multi_profile_enable";
constexpr const char* kUseExternalDataInitializer = "nv_use_external_data_initializer";

} // namespace provider_option_names
namespace run_option_names {
Expand Down
17 changes: 0 additions & 17 deletions include/onnxruntime/core/session/environment.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,23 +199,6 @@ class Environment {

using OrtAllocatorUniquePtr = std::unique_ptr<OrtAllocator, std::function<void(OrtAllocator*)>>;

// if the user calls CreateSharedAllocator and wraps the plugin EP's allocator with an arena we end up with
// OrtAllocator from EP -> wrapped in IAllocatorImplWrappingOrtAllocator -> inside a BFCArena IAllocator.
// we can put that in shared_allocators_ for sessions to use, but to have an OrtAllocator available in
// shared_ort_allocators_ that can be used outside of a session we need to additionally wrap that in an
// OrtAllocatorImplWrappingIAllocator. way too many levels of indirection but that is what it is currently.
// we need something to own that final OrtAllocator, so we add it to arena_ort_allocators_.
//
// TODO: we could split out the BFCArena implementation so it can be plugged into either an IAllocator
// or an OrtAllocator instance to reduce the indirection a little.
// with that we get an OrtAllocator from the EP, wrap it with an OrtAllocator based BFCArena, and wrap that with the
// IAllocatorImplWrappingOrtAllocator which takes ownership of the OrtAllocator and is in shared_allocators_.
//
// Alternatively we can disable wrapping an EP's allocator with a BFCArena and say the EP should provide the arena
// implementation directly. They're free to copy BFCArena as it came from TF originally. Or we could provide a
// cut-and-paste BFCArena implementation that works using the EP API that can be included in the EP source.
std::unordered_map<const OrtMemoryInfo*, std::unique_ptr<OrtAllocatorImplWrappingIAllocator>> arena_ort_allocators_;

#if !defined(ORT_MINIMAL_BUILD)
// register EPs that are built into the ORT binary so they can take part in AutoEP selection
// added to ep_libraries
Expand Down
13 changes: 12 additions & 1 deletion include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -5829,7 +5829,7 @@ struct OrtApi {
*
* \since Version 1.23.
*/
ORT_API2_STATUS(Graph_GetNodes, const OrtGraph* graph,
ORT_API2_STATUS(Graph_GetNodes, _In_ const OrtGraph* graph,
_Out_writes_(num_nodes) const OrtNode** nodes, _In_ size_t num_nodes);

/** \brief Get the parent node for the given graph, if any exists.
Expand Down Expand Up @@ -6469,6 +6469,17 @@ struct OrtApi {
_In_reads_(num_tensors) OrtValue* const* dst_tensors,
_In_opt_ OrtSyncStream* stream,
_In_ size_t num_tensors);

/** \brief Get ::OrtModelMetadata from an ::OrtGraph
*
* \param[in] graph The OrtGraph instance.
* \param[out] out Newly created ::OrtModelMetadata. Must be freed using OrtApi::ReleaseModelMetadata.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(Graph_GetModelMetadata, _In_ const OrtGraph* graph, _Outptr_ OrtModelMetadata** out);
};

/*
Expand Down
Loading
Loading