Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
5b8550c
[build] disable CodeQL for NPM Packaging Pipeline (#25614)
fs-eire Aug 1, 2025
d94f19c
Refactor Java Test Pipeline (#25608)
snnn Jul 31, 2025
2769b01
[build] upgrade Node.js for NPM packaging pipeline (#25568)
fs-eire Jul 29, 2025
de8c7fe
[webgpu] Apply Flash Attention if sliding window exceeds KV cache len…
daijh Aug 1, 2025
879b798
Relax WeightBiasQuantization constraint for larger QDQ node group (#2…
qti-yuduo Aug 14, 2025
9ba9258
Add cuda graph implementation for NV TRT RTX EP (#25787)
umangb-09 Aug 27, 2025
4d8d79e
python GPU IO Bindings for NVIDIA (#25776)
ishwar-raut1 Aug 28, 2025
118ebc4
Fixes for DynamicQuantizeMatMul and Attention3D tests (#25814)
JonathanC-ARM Aug 28, 2025
bc58268
Fix a long standing bug on file memory mapping on windows. (#25833)
yuslepukhin Aug 27, 2025
3a05461
Add API for precompiled model compatibility check using just the comp…
adrastogi Aug 27, 2025
34b3c54
Enable ABSL_FLAGS flag registration for onnxruntime_perf_test for mob…
chilo-ms Aug 26, 2025
19659e6
Add default constructor to Ort::Status. (#25860)
yuslepukhin Aug 27, 2025
f3464aa
update
snnn Aug 29, 2025
1a743ae
Merge branch 'users/snnn/rel-1.23.0' of https://github.com/microsoft/…
snnn Aug 29, 2025
32ea3f5
[CPU] Optimize GQA attention bias application for FP16 (#25871)
derdeljan-msft Aug 28, 2025
6615173
Language bindings for model compatibility API (#25878)
adrastogi Aug 29, 2025
e5d790e
[OVEP] OpenVINO EP Features and bug-fixes for ORT-1.23 (#25884)
preetha-intel Aug 29, 2025
0e74bb9
[EP ABI] Add OpAttr_GetTensorAttributeAsOrtValue and replace the exis…
chilo-ms Aug 29, 2025
6a9ddb6
Add error handling to extract_nuget_files.ps1 (#25866)
snnn Aug 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1226,6 +1226,12 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
${onnxruntime_perf_test_src_patterns}
)
onnxruntime_add_executable(onnxruntime_perf_test ${onnxruntime_perf_test_src} ${ONNXRUNTIME_ROOT}/core/platform/path_lib.cc)

# ABSL_FLAGS_STRIP_NAMES is set to 1 by default to disable flag registration when building for Android, iPhone, and "embedded devices".
# See the issue: https://github.com/abseil/abseil-cpp/issues/1875
# We set it to 0 for all builds to be able to use ABSL flags for onnxruntime_perf_test.
target_compile_definitions(onnxruntime_perf_test PRIVATE ABSL_FLAGS_STRIP_NAMES=0)

if(MSVC)
target_compile_options(onnxruntime_perf_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"
"$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
Expand Down
98 changes: 98 additions & 0 deletions csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.shared.cs
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,88 @@ public struct OrtApi
public IntPtr EpDevice_Device;
public IntPtr GetEpApi;
public IntPtr GetTensorSizeInBytes;

public IntPtr AllocatorGetStats;

public IntPtr CreateMemoryInfo_V2;
public IntPtr MemoryInfoGetDeviceMemType;
public IntPtr MemoryInfoGetVendorId;

public IntPtr ValueInfo_GetValueProducer;
public IntPtr ValueInfo_GetValueNumConsumers;
public IntPtr ValueInfo_GetValueConsumers;
public IntPtr ValueInfo_GetInitializerValue;
public IntPtr ValueInfo_GetExternalInitializerInfo;
public IntPtr ValueInfo_IsRequiredGraphInput;
public IntPtr ValueInfo_IsOptionalGraphInput;
public IntPtr ValueInfo_IsGraphOutput;
public IntPtr ValueInfo_IsConstantInitializer;
public IntPtr ValueInfo_IsFromOuterScope;
public IntPtr Graph_GetName;
public IntPtr Graph_GetModelPath;
public IntPtr Graph_GetOnnxIRVersion;
public IntPtr Graph_GetNumOperatorSets;
public IntPtr Graph_GetOperatorSets;
public IntPtr Graph_GetNumInputs;
public IntPtr Graph_GetInputs;
public IntPtr Graph_GetNumOutputs;
public IntPtr Graph_GetOutputs;
public IntPtr Graph_GetNumInitializers;
public IntPtr Graph_GetInitializers;
public IntPtr Graph_GetNumNodes;
public IntPtr Graph_GetNodes;
public IntPtr Graph_GetParentNode;
public IntPtr Graph_GetGraphView;
public IntPtr Node_GetId;
public IntPtr Node_GetName;
public IntPtr Node_GetOperatorType;
public IntPtr Node_GetDomain;
public IntPtr Node_GetSinceVersion;
public IntPtr Node_GetNumInputs;
public IntPtr Node_GetInputs;
public IntPtr Node_GetNumOutputs;
public IntPtr Node_GetOutputs;
public IntPtr Node_GetNumImplicitInputs;
public IntPtr Node_GetImplicitInputs;
public IntPtr Node_GetNumAttributes;
public IntPtr Node_GetAttributes;
public IntPtr Node_GetAttributeByName;
public IntPtr Node_GetTensorAttributeAsOrtValue;
public IntPtr OpAttr_GetType;
public IntPtr OpAttr_GetName;
public IntPtr Node_GetNumSubgraphs;
public IntPtr Node_GetSubgraphs;
public IntPtr Node_GetGraph;
public IntPtr Node_GetEpName;
public IntPtr ReleaseExternalInitializerInfo;
public IntPtr ExternalInitializerInfo_GetFilePath;
public IntPtr ExternalInitializerInfo_GetFileOffset;
public IntPtr ExternalInitializerInfo_GetByteSize;

public IntPtr GetRunConfigEntry;

public IntPtr EpDevice_MemoryInfo;

public IntPtr CreateSharedAllocator;
public IntPtr GetSharedAllocator;
public IntPtr ReleaseSharedAllocator;

public IntPtr GetTensorData;

public IntPtr GetSessionOptionsConfigEntries;

public IntPtr SessionGetMemoryInfoForInputs;
public IntPtr SessionGetMemoryInfoForOutputs;
public IntPtr SessionGetEpDeviceForInputs;

public IntPtr CreateSyncStreamForEpDevice;
public IntPtr SyncStream_GetHandle;
public IntPtr ReleaseSyncStream;

public IntPtr CopyTensors;

public IntPtr Graph_GetModelMetadata;
public IntPtr GetModelCompatibilityForEpDevices;
}

internal static class NativeMethods
Expand Down Expand Up @@ -704,6 +786,10 @@ static NativeMethods()
(DSessionOptionsSetEpSelectionPolicyDelegate)Marshal.GetDelegateForFunctionPointer(
api_.SessionOptionsSetEpSelectionPolicyDelegate,
typeof(DSessionOptionsSetEpSelectionPolicyDelegate));

OrtGetModelCompatibilityForEpDevices = (DOrtGetModelCompatibilityForEpDevices)Marshal.GetDelegateForFunctionPointer(
api_.GetModelCompatibilityForEpDevices,
typeof(DOrtGetModelCompatibilityForEpDevices));
}

internal class NativeLib
Expand Down Expand Up @@ -2456,6 +2542,18 @@ public delegate void DOrtRemoveKeyValuePair(IntPtr /* OrtKeyValuePairs* */ kvps,

public static DOrtGetEpDevices OrtGetEpDevices;

/// <summary>
/// Validate compiled model compatibility for the provided EP devices.
/// </summary>
[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* OrtStatus* */ DOrtGetModelCompatibilityForEpDevices(
IntPtr[] /* const OrtEpDevice* const* */ ep_devices,
UIntPtr /* size_t */ num_ep_devices,
byte[] /* const char* */ compatibility_info,
out int /* OrtCompiledModelCompatibility */ out_status);

public static DOrtGetModelCompatibilityForEpDevices OrtGetModelCompatibilityForEpDevices;

/// <summary>
/// Add execution provider devices to the session options.
/// Priority is based on the order of the OrtEpDevice instances. Highest priority first.
Expand Down
40 changes: 40 additions & 0 deletions csharp/src/Microsoft.ML.OnnxRuntime/OrtEnv.shared.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,21 @@

namespace Microsoft.ML.OnnxRuntime
{
/// <summary>
/// Represents the compatibility status of a pre-compiled model with one or more execution provider devices.
/// </summary>
/// <remarks>
/// This enum is used to determine whether a pre-compiled model can be used with specific execution providers
/// and devices, or if recompilation is needed.
/// </remarks>
public enum OrtCompiledModelCompatibility
{
EP_NOT_APPLICABLE = 0,
EP_SUPPORTED_OPTIMAL = 1,
EP_SUPPORTED_PREFER_RECOMPILATION = 2,
EP_UNSUPPORTED = 3,
}

/// <summary>
/// Delegate for logging function callback.
/// Supply your function and register it with the environment to receive logging callbacks via
Expand Down Expand Up @@ -361,6 +376,31 @@ public string[] GetAvailableProviders()
}
}

/// <summary>
/// Validate a compiled model's compatibility information for one or more EP devices.
/// </summary>
/// <param name="epDevices">The list of EP devices to validate against.</param>
/// <param name="compatibilityInfo">The compatibility string from the precompiled model to validate.</param>
/// <returns>OrtCompiledModelCompatibility enum value denoting the compatibility status</returns>
public OrtCompiledModelCompatibility GetModelCompatibilityForEpDevices(
IReadOnlyList<OrtEpDevice> epDevices, string compatibilityInfo)
{
if (epDevices == null || epDevices.Count == 0)
throw new ArgumentException("epDevices must be non-empty", nameof(epDevices));

var devicePtrs = new IntPtr[epDevices.Count];
for (int i = 0; i < epDevices.Count; ++i)
{
devicePtrs[i] = epDevices[i].Handle;
}

var infoUtf8 = NativeOnnxValueHelper.StringToZeroTerminatedUtf8(compatibilityInfo);
NativeApiStatus.VerifySuccess(
NativeMethods.OrtGetModelCompatibilityForEpDevices(
devicePtrs, (UIntPtr)devicePtrs.Length, infoUtf8, out int status));
return (OrtCompiledModelCompatibility)status;
}


/// <summary>
/// Get/Set log level property of OrtEnv instance
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

// not supported on mobile platforms
#if !(ANDROID || IOS)

namespace Microsoft.ML.OnnxRuntime.Tests;

using System;
using System.Linq;
using Xunit;
using System.Collections.Generic;

public class EpCompatibilityTests
{
private readonly OrtEnv ortEnvInstance = OrtEnv.Instance();

private IReadOnlyList<OrtEpDevice> GetDevices()
{
var epDevices = ortEnvInstance.GetEpDevices();
Assert.NotNull(epDevices);
Assert.NotEmpty(epDevices);
return epDevices;
}

[Fact]
public void GetEpCompatibility_InvalidArgs()
{
Assert.Throws<ArgumentException>(() => ortEnvInstance.GetModelCompatibilityForEpDevices(null, "info"));
Assert.Throws<ArgumentException>(() => ortEnvInstance.GetModelCompatibilityForEpDevices(new List<OrtEpDevice>(), "info"));
}

[Fact]
public void GetEpCompatibility_SingleDeviceCpuProvider()
{
var devices = GetDevices();
var someInfo = "arbitrary-compat-string";

// Use CPU device
var cpu = devices.First(d => d.EpName == "CPUExecutionProvider");
Assert.NotNull(cpu);
var selected = new List<OrtEpDevice> { cpu };
var status = ortEnvInstance.GetModelCompatibilityForEpDevices(selected, someInfo);

// CPU defaults to not applicable in this scenario
Assert.Equal(OrtCompiledModelCompatibility.EP_NOT_APPLICABLE, status);
}
}
#endif
26 changes: 10 additions & 16 deletions include/onnxruntime/core/graph/model_saving_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,36 +9,30 @@ class PrepackedWeightsForGraph;

// These options affect how the model initializers are written to the external file.
// This includes options to align external initializer offset.
// For models running on CPU, ORT will try to use mmap to load external
// initializers. To use mmap, external initializer need to be offset aligned.
// ORT will try to use mmap to load external initializers.
//
// ORT saves external initializers into single data file, each initializer is
// accessed with offset(start position of initializer) and length(byte length of
// initializer) of the data file. To use mmap, each offset need to be aligned
// which means offset need to divisible by allocation granularity(64KB for
// windows and 4K for other OSes). With align_offset to true, ORT will align
// offset for large initializer when save ONNX model with external data file.
// initializer) of the data file. With align_offset to true, ORT will align
// offset for large initializer (larger than align_threshold)
// when save ONNX model with external data file. It will align then to
// on_disk_alignment value.
struct ModelSavingOptions {
explicit ModelSavingOptions(size_t size_threshold)
: initializer_size_threshold(size_threshold) {}

// Minimal initializer size in bytes to be externalized on disk
size_t initializer_size_threshold;
// Offset will always be page aligned and allocation granularity aligned for
// mmap support. This is done by padding previous tensor data with zeros
// keeping same length.
// Offset will always be aligned for mmap support.
// This is done by padding previous tensor data with zeros keeping same length.
bool align_offset = false;
// Alignment threshold for size of data.
// Having a low threshold will waste file space for small initializers.
// Only when tensor's data size is > the page_align_threshold it will be force
// aligned. Default to 1MB.
int64_t align_threshold = 1048576;
// The allocation Granularity for mmap() support.
// Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.
#ifdef _WIN32
int64_t allocation_granularity = 65536;
#else
int64_t allocation_granularity = 4096;
#endif
// Alignment factor for big tensors (bigger than align_threshold). Defaults to 4K.
int64_t on_disk_alignment = 4096;
// Force embed all external initializer into the Onnx file
// Used for EPContext model generation while some nodes fallback on CPU which has external data dependency
bool force_embed_external_ini = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ constexpr const char* kDetailedBuildLog = "nv_detailed_build_log";
constexpr const char* kProfilesMinShapes = "nv_profile_min_shapes";
constexpr const char* kProfilesMaxShapes = "nv_profile_max_shapes";
constexpr const char* kProfilesOptShapes = "nv_profile_opt_shapes";
constexpr const char* kCudaGraphEnable = "nv_cuda_graph_enable";
constexpr const char* kCudaGraphEnable = "enable_cuda_graph";
constexpr const char* kMultiProfileEnable = "nv_multi_profile_enable";
constexpr const char* kUseExternalDataInitializer = "nv_use_external_data_initializer";

Expand Down
8 changes: 4 additions & 4 deletions include/onnxruntime/core/providers/utils/ort_graph_to_proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ static Ort::Status GetOrtValueInfoTensorTypeShape(const OrtValueInfo& ort_value_
/*out*/ std::vector<int64_t>& dims,
/*out*/ std::vector<std::string>& symbolic_dims);
static Ort::Status OrtValueInfoToProto(const OrtValueInfo& ort_value_info, onnx::ValueInfoProto& value_info_proto);
static Ort::Status OrtOpAttrToProto(const OrtNode& ort_node, const OrtOpAttr& ort_attr, onnx::AttributeProto& attr_proto);
static Ort::Status OrtOpAttrToProto(const OrtOpAttr& ort_attr, onnx::AttributeProto& attr_proto);

Ort::Status OrtGraphToProto(const OrtGraph& ort_graph,
onnx::GraphProto& graph_proto,
Expand Down Expand Up @@ -379,7 +379,7 @@ Ort::Status OrtGraphToProto(const OrtGraph& ort_graph,
}

onnx::AttributeProto* attr_proto = node_proto->add_attribute();
ORT_EP_UTILS_CXX_RETURN_IF_ERROR(OrtOpAttrToProto(*ort_node, *ort_attr, *attr_proto));
ORT_EP_UTILS_CXX_RETURN_IF_ERROR(OrtOpAttrToProto(*ort_attr, *attr_proto));
}
}

Expand Down Expand Up @@ -652,7 +652,7 @@ static Ort::Status OrtValueInfoToProto(const OrtValueInfo& ort_value_info,
return Ort::Status{nullptr};
}

static Ort::Status OrtOpAttrToProto(const OrtNode& ort_node, const OrtOpAttr& ort_attr, onnx::AttributeProto& attr_proto) {
static Ort::Status OrtOpAttrToProto(const OrtOpAttr& ort_attr, onnx::AttributeProto& attr_proto) {
const OrtApi& ort_api = Ort::GetApi();

const char* attr_name = nullptr;
Expand Down Expand Up @@ -766,7 +766,7 @@ static Ort::Status OrtOpAttrToProto(const OrtNode& ort_node, const OrtOpAttr& or
// TensorProto as an attribute value doesn't require a name.

OrtValue* ort_value = nullptr;
ORT_EP_UTILS_C_RETURN_IF_ERROR(ort_api.Node_GetTensorAttributeAsOrtValue(&ort_node, &ort_attr, &ort_value));
ORT_EP_UTILS_C_RETURN_IF_ERROR(ort_api.OpAttr_GetTensorAttributeAsOrtValue(&ort_attr, &ort_value));

Ort::Value tensor(ort_value);

Expand Down
31 changes: 29 additions & 2 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,16 @@ typedef void (*RunAsyncCallbackFn)(void* user_data, OrtValue** outputs, size_t n
*
* \nosubgrouping
*/
/*
* Public enum for compiled model compatibility across EPs.
*/
typedef enum OrtCompiledModelCompatibility {
OrtCompiledModelCompatibility_EP_NOT_APPLICABLE = 0,
OrtCompiledModelCompatibility_EP_SUPPORTED_OPTIMAL,
OrtCompiledModelCompatibility_EP_SUPPORTED_PREFER_RECOMPILATION,
OrtCompiledModelCompatibility_EP_UNSUPPORTED,
} OrtCompiledModelCompatibility;

struct OrtApi {
/// \name OrtStatus
/// @{
Expand Down Expand Up @@ -6069,7 +6079,6 @@ struct OrtApi {

/** \brief Get the OrtNode's 'TENSOR' attribute as an OrtValue.
*
* \param[in] node The OrtNode instance.
* \param[in] attribute The OrtOpAttr instance.
* \param[out] attr_tensor If successful, contains the 'TENSOR' attribute as a newly created OrtValue.
Must be freed with OrtApi::ReleaseValue.
Expand All @@ -6078,7 +6087,7 @@ struct OrtApi {
*
* \since Version 1.23.
*/
ORT_API2_STATUS(Node_GetTensorAttributeAsOrtValue, _In_ const OrtNode* node, _In_ const OrtOpAttr* attribute,
ORT_API2_STATUS(OpAttr_GetTensorAttributeAsOrtValue, _In_ const OrtOpAttr* attribute,
_Outptr_result_maybenull_ OrtValue** attr_tensor);

/** \brief Get the attribute type as OrtOpAttrType from an OrtOpAttr.
Expand Down Expand Up @@ -6480,6 +6489,24 @@ struct OrtApi {
* \since Version 1.23.
*/
ORT_API2_STATUS(Graph_GetModelMetadata, _In_ const OrtGraph* graph, _Outptr_ OrtModelMetadata** out);

/** \brief Validate a compiled model's compatibility information for one or more EP devices.
*
* \param[in] ep_devices The EP devices to validate against (e.g., from GetEpDevices).
* All devices must belong to the same execution provider.
* \param[in] num_ep_devices The number of EP devices provided.
* \param[in] compatibility_info The compatibility info string produced when the model was compiled.
* \param[out] out_status The resulting compatibility status for the EP devices.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(GetModelCompatibilityForEpDevices,
_In_reads_(num_ep_devices) const OrtEpDevice* const* ep_devices,
_In_ size_t num_ep_devices,
_In_ const char* compatibility_info,
_Out_ OrtCompiledModelCompatibility* out_status);
};

/*
Expand Down
Loading
Loading