microsoft · snnn · Aug 29, 2025 · Aug 1, 2025 · Jul 31, 2025 · Jul 29, 2025
diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
@@ -1226,6 +1226,12 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
       ${onnxruntime_perf_test_src_patterns}
       )
     onnxruntime_add_executable(onnxruntime_perf_test ${onnxruntime_perf_test_src} ${ONNXRUNTIME_ROOT}/core/platform/path_lib.cc)
+
+    # ABSL_FLAGS_STRIP_NAMES is set to 1 by default to disable flag registration when building for Android, iPhone, and "embedded devices".
+    # See the issue: https://github.com/abseil/abseil-cpp/issues/1875
+    # We set it to 0 for all builds to be able to use ABSL flags for onnxruntime_perf_test.
+    target_compile_definitions(onnxruntime_perf_test PRIVATE ABSL_FLAGS_STRIP_NAMES=0)
+
     if(MSVC)
       target_compile_options(onnxruntime_perf_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"
             "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.shared.cs
@@ -368,6 +368,88 @@ public struct OrtApi
         public IntPtr EpDevice_Device;
         public IntPtr GetEpApi;
         public IntPtr GetTensorSizeInBytes;
+
+        public IntPtr AllocatorGetStats;
+
+        public IntPtr CreateMemoryInfo_V2;
+        public IntPtr MemoryInfoGetDeviceMemType;
+        public IntPtr MemoryInfoGetVendorId;
+
+        public IntPtr ValueInfo_GetValueProducer;
+        public IntPtr ValueInfo_GetValueNumConsumers;
+        public IntPtr ValueInfo_GetValueConsumers;
+        public IntPtr ValueInfo_GetInitializerValue;
+        public IntPtr ValueInfo_GetExternalInitializerInfo;
+        public IntPtr ValueInfo_IsRequiredGraphInput;
+        public IntPtr ValueInfo_IsOptionalGraphInput;
+        public IntPtr ValueInfo_IsGraphOutput;
+        public IntPtr ValueInfo_IsConstantInitializer;
+        public IntPtr ValueInfo_IsFromOuterScope;
+        public IntPtr Graph_GetName;
+        public IntPtr Graph_GetModelPath;
+        public IntPtr Graph_GetOnnxIRVersion;
+        public IntPtr Graph_GetNumOperatorSets;
+        public IntPtr Graph_GetOperatorSets;
+        public IntPtr Graph_GetNumInputs;
+        public IntPtr Graph_GetInputs;
+        public IntPtr Graph_GetNumOutputs;
+        public IntPtr Graph_GetOutputs;
+        public IntPtr Graph_GetNumInitializers;
+        public IntPtr Graph_GetInitializers;
+        public IntPtr Graph_GetNumNodes;
+        public IntPtr Graph_GetNodes;
+        public IntPtr Graph_GetParentNode;
+        public IntPtr Graph_GetGraphView;
+        public IntPtr Node_GetId;
+        public IntPtr Node_GetName;
+        public IntPtr Node_GetOperatorType;
+        public IntPtr Node_GetDomain;
+        public IntPtr Node_GetSinceVersion;
+        public IntPtr Node_GetNumInputs;
+        public IntPtr Node_GetInputs;
+        public IntPtr Node_GetNumOutputs;
+        public IntPtr Node_GetOutputs;
+        public IntPtr Node_GetNumImplicitInputs;
+        public IntPtr Node_GetImplicitInputs;
+        public IntPtr Node_GetNumAttributes;
+        public IntPtr Node_GetAttributes;
+        public IntPtr Node_GetAttributeByName;
+        public IntPtr Node_GetTensorAttributeAsOrtValue;
+        public IntPtr OpAttr_GetType;
+        public IntPtr OpAttr_GetName;
+        public IntPtr Node_GetNumSubgraphs;
+        public IntPtr Node_GetSubgraphs;
+        public IntPtr Node_GetGraph;
+        public IntPtr Node_GetEpName;
+        public IntPtr ReleaseExternalInitializerInfo;
+        public IntPtr ExternalInitializerInfo_GetFilePath;
+        public IntPtr ExternalInitializerInfo_GetFileOffset;
+        public IntPtr ExternalInitializerInfo_GetByteSize;
+
+        public IntPtr GetRunConfigEntry;
+
+        public IntPtr EpDevice_MemoryInfo;
+
+        public IntPtr CreateSharedAllocator;
+        public IntPtr GetSharedAllocator;
+        public IntPtr ReleaseSharedAllocator;
+
+        public IntPtr GetTensorData;
+
+        public IntPtr GetSessionOptionsConfigEntries;
+
+        public IntPtr SessionGetMemoryInfoForInputs;
+        public IntPtr SessionGetMemoryInfoForOutputs;
+        public IntPtr SessionGetEpDeviceForInputs;
+
+        public IntPtr CreateSyncStreamForEpDevice;
+        public IntPtr SyncStream_GetHandle;
+        public IntPtr ReleaseSyncStream;
+
+        public IntPtr CopyTensors;
+
+        public IntPtr Graph_GetModelMetadata;
+        public IntPtr GetModelCompatibilityForEpDevices;
     }
 
     internal static class NativeMethods
@@ -704,6 +786,10 @@ static NativeMethods()
                 (DSessionOptionsSetEpSelectionPolicyDelegate)Marshal.GetDelegateForFunctionPointer(
                     api_.SessionOptionsSetEpSelectionPolicyDelegate,
                     typeof(DSessionOptionsSetEpSelectionPolicyDelegate));
+
+            OrtGetModelCompatibilityForEpDevices = (DOrtGetModelCompatibilityForEpDevices)Marshal.GetDelegateForFunctionPointer(
+                api_.GetModelCompatibilityForEpDevices,
+                typeof(DOrtGetModelCompatibilityForEpDevices));
         }
 
         internal class NativeLib
@@ -2456,6 +2542,18 @@ public delegate void DOrtRemoveKeyValuePair(IntPtr /* OrtKeyValuePairs* */ kvps,
 
         public static DOrtGetEpDevices OrtGetEpDevices;
 
+        /// <summary>
+        /// Validate compiled model compatibility for the provided EP devices.
+        /// </summary>
+        [UnmanagedFunctionPointer(CallingConvention.Winapi)]
+        public delegate IntPtr /* OrtStatus* */ DOrtGetModelCompatibilityForEpDevices(
+            IntPtr[] /* const OrtEpDevice* const* */ ep_devices,
+            UIntPtr /* size_t */ num_ep_devices,
+            byte[] /* const char* */ compatibility_info,
+            out int /* OrtCompiledModelCompatibility */ out_status);
+
+        public static DOrtGetModelCompatibilityForEpDevices OrtGetModelCompatibilityForEpDevices;
+
         /// <summary>
         /// Add execution provider devices to the session options.
         /// Priority is based on the order of the OrtEpDevice instances. Highest priority first.

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtEnv.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtEnv.shared.cs
@@ -7,6 +7,21 @@
 
 namespace Microsoft.ML.OnnxRuntime
 {
+    /// <summary>
+    /// Represents the compatibility status of a pre-compiled model with one or more execution provider devices.
+    /// </summary>
+    /// <remarks>
+    /// This enum is used to determine whether a pre-compiled model can be used with specific execution providers
+    /// and devices, or if recompilation is needed. 
+    /// </remarks>
+    public enum OrtCompiledModelCompatibility
+    {
+        EP_NOT_APPLICABLE = 0,
+        EP_SUPPORTED_OPTIMAL = 1,
+        EP_SUPPORTED_PREFER_RECOMPILATION = 2,
+        EP_UNSUPPORTED = 3,
+    }
+
     /// <summary>
     /// Delegate for logging function callback.
     /// Supply your function and register it with the environment to receive logging callbacks via
@@ -361,6 +376,31 @@ public string[] GetAvailableProviders()
             }
         }
 
+        /// <summary>
+        /// Validate a compiled model's compatibility information for one or more EP devices.
+        /// </summary>
+        /// <param name="epDevices">The list of EP devices to validate against.</param>
+        /// <param name="compatibilityInfo">The compatibility string from the precompiled model to validate.</param>
+        /// <returns>OrtCompiledModelCompatibility enum value denoting the compatibility status</returns>
+        public OrtCompiledModelCompatibility GetModelCompatibilityForEpDevices(
+            IReadOnlyList<OrtEpDevice> epDevices, string compatibilityInfo)
+        {
+            if (epDevices == null || epDevices.Count == 0)
+                throw new ArgumentException("epDevices must be non-empty", nameof(epDevices));
+
+            var devicePtrs = new IntPtr[epDevices.Count];
+            for (int i = 0; i < epDevices.Count; ++i)
+            {
+                devicePtrs[i] = epDevices[i].Handle;
+            }
+
+            var infoUtf8 = NativeOnnxValueHelper.StringToZeroTerminatedUtf8(compatibilityInfo);
+            NativeApiStatus.VerifySuccess(
+                NativeMethods.OrtGetModelCompatibilityForEpDevices(
+                    devicePtrs, (UIntPtr)devicePtrs.Length, infoUtf8, out int status));
+            return (OrtCompiledModelCompatibility)status;
+        }
+
 
         /// <summary>
         /// Get/Set log level property of OrtEnv instance

diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests.Common/EpCompatibilityTests.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests.Common/EpCompatibilityTests.cs
@@ -0,0 +1,49 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// not supported on mobile platforms
+#if !(ANDROID || IOS)
+
+namespace Microsoft.ML.OnnxRuntime.Tests;
+
+using System;
+using System.Linq;
+using Xunit;
+using System.Collections.Generic;
+
+public class EpCompatibilityTests
+{
+    private readonly OrtEnv ortEnvInstance = OrtEnv.Instance();
+
+    private IReadOnlyList<OrtEpDevice> GetDevices()
+    {
+        var epDevices = ortEnvInstance.GetEpDevices();
+        Assert.NotNull(epDevices);
+        Assert.NotEmpty(epDevices);
+        return epDevices;
+    }
+
+    [Fact]
+    public void GetEpCompatibility_InvalidArgs()
+    {
+        Assert.Throws<ArgumentException>(() => ortEnvInstance.GetModelCompatibilityForEpDevices(null, "info"));
+        Assert.Throws<ArgumentException>(() => ortEnvInstance.GetModelCompatibilityForEpDevices(new List<OrtEpDevice>(), "info"));
+    }
+
+    [Fact]
+    public void GetEpCompatibility_SingleDeviceCpuProvider()
+    {
+        var devices = GetDevices();
+        var someInfo = "arbitrary-compat-string";
+
+        // Use CPU device 
+        var cpu = devices.First(d => d.EpName == "CPUExecutionProvider");
+        Assert.NotNull(cpu);
+        var selected = new List<OrtEpDevice> { cpu };
+        var status = ortEnvInstance.GetModelCompatibilityForEpDevices(selected, someInfo);
+
+        // CPU defaults to not applicable in this scenario
+        Assert.Equal(OrtCompiledModelCompatibility.EP_NOT_APPLICABLE, status);
+    }
+}
+#endif
diff --git a/include/onnxruntime/core/graph/model_saving_options.h b/include/onnxruntime/core/graph/model_saving_options.h
@@ -9,36 +9,30 @@ class PrepackedWeightsForGraph;
 
 // These options affect how the model initializers are written to the external file.
 // This includes options to align external initializer offset.
-// For models running on CPU, ORT will try to use mmap to load external
-// initializers. To use mmap, external initializer need to be offset aligned.
+// ORT will try to use mmap to load external initializers.
+//
 // ORT saves external initializers into single data file, each initializer is
 // accessed with offset(start position of initializer) and length(byte length of
-// initializer) of the data file. To use mmap, each offset need to be aligned
-// which means offset need to divisible by allocation granularity(64KB for
-// windows and 4K for other OSes). With align_offset to true, ORT will align
-// offset for large initializer when save ONNX model with external data file.
+// initializer) of the data file. With align_offset to true, ORT will align
+// offset for large initializer (larger than  align_threshold)
+// when save ONNX model with external data file. It will align then to
+// on_disk_alignment value.
 struct ModelSavingOptions {
   explicit ModelSavingOptions(size_t size_threshold)
       : initializer_size_threshold(size_threshold) {}
 
   // Minimal initializer size in bytes to be externalized on disk
   size_t initializer_size_threshold;
-  // Offset will always be page aligned and allocation granularity aligned for
-  // mmap support. This is done by padding previous tensor data with zeros
-  // keeping same length.
+  // Offset will always be aligned for mmap support.
+  // This is done by padding previous tensor data with zeros keeping same length.
   bool align_offset = false;
   // Alignment threshold for size of data.
   // Having a low threshold will waste file space for small initializers.
   // Only when tensor's data size is > the page_align_threshold it will be force
   // aligned. Default to 1MB.
   int64_t align_threshold = 1048576;
-  // The allocation Granularity for mmap() support.
-  // Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.
-#ifdef _WIN32
-  int64_t allocation_granularity = 65536;
-#else
-  int64_t allocation_granularity = 4096;
-#endif
+  // Alignment factor for big tensors (bigger than align_threshold). Defaults to 4K.
+  int64_t on_disk_alignment = 4096;
   // Force embed all external initializer into the Onnx file
   // Used for EPContext model generation while some nodes fallback on CPU which has external data dependency
   bool force_embed_external_ini = false;

diff --git a/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h b/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h
@@ -31,7 +31,7 @@ constexpr const char* kDetailedBuildLog = "nv_detailed_build_log";
 constexpr const char* kProfilesMinShapes = "nv_profile_min_shapes";
 constexpr const char* kProfilesMaxShapes = "nv_profile_max_shapes";
 constexpr const char* kProfilesOptShapes = "nv_profile_opt_shapes";
-constexpr const char* kCudaGraphEnable = "nv_cuda_graph_enable";
+constexpr const char* kCudaGraphEnable = "enable_cuda_graph";
 constexpr const char* kMultiProfileEnable = "nv_multi_profile_enable";
 constexpr const char* kUseExternalDataInitializer = "nv_use_external_data_initializer";
 

diff --git a/include/onnxruntime/core/providers/utils/ort_graph_to_proto.h b/include/onnxruntime/core/providers/utils/ort_graph_to_proto.h
@@ -232,7 +232,7 @@ static Ort::Status GetOrtValueInfoTensorTypeShape(const OrtValueInfo& ort_value_
                                                   /*out*/ std::vector<int64_t>& dims,
                                                   /*out*/ std::vector<std::string>& symbolic_dims);
 static Ort::Status OrtValueInfoToProto(const OrtValueInfo& ort_value_info, onnx::ValueInfoProto& value_info_proto);
-static Ort::Status OrtOpAttrToProto(const OrtNode& ort_node, const OrtOpAttr& ort_attr, onnx::AttributeProto& attr_proto);
+static Ort::Status OrtOpAttrToProto(const OrtOpAttr& ort_attr, onnx::AttributeProto& attr_proto);
 
 Ort::Status OrtGraphToProto(const OrtGraph& ort_graph,
                             onnx::GraphProto& graph_proto,
@@ -379,7 +379,7 @@ Ort::Status OrtGraphToProto(const OrtGraph& ort_graph,
         }
 
         onnx::AttributeProto* attr_proto = node_proto->add_attribute();
-        ORT_EP_UTILS_CXX_RETURN_IF_ERROR(OrtOpAttrToProto(*ort_node, *ort_attr, *attr_proto));
+        ORT_EP_UTILS_CXX_RETURN_IF_ERROR(OrtOpAttrToProto(*ort_attr, *attr_proto));
       }
     }
 
@@ -652,7 +652,7 @@ static Ort::Status OrtValueInfoToProto(const OrtValueInfo& ort_value_info,
   return Ort::Status{nullptr};
 }
 
-static Ort::Status OrtOpAttrToProto(const OrtNode& ort_node, const OrtOpAttr& ort_attr, onnx::AttributeProto& attr_proto) {
+static Ort::Status OrtOpAttrToProto(const OrtOpAttr& ort_attr, onnx::AttributeProto& attr_proto) {
   const OrtApi& ort_api = Ort::GetApi();
 
   const char* attr_name = nullptr;
@@ -766,7 +766,7 @@ static Ort::Status OrtOpAttrToProto(const OrtNode& ort_node, const OrtOpAttr& or
       // TensorProto as an attribute value doesn't require a name.
 
       OrtValue* ort_value = nullptr;
-      ORT_EP_UTILS_C_RETURN_IF_ERROR(ort_api.Node_GetTensorAttributeAsOrtValue(&ort_node, &ort_attr, &ort_value));
+      ORT_EP_UTILS_C_RETURN_IF_ERROR(ort_api.OpAttr_GetTensorAttributeAsOrtValue(&ort_attr, &ort_value));
 
       Ort::Value tensor(ort_value);
 

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -902,6 +902,16 @@ typedef void (*RunAsyncCallbackFn)(void* user_data, OrtValue** outputs, size_t n
  *
  * \nosubgrouping
  */
+/*
+ * Public enum for compiled model compatibility across EPs.
+ */
+typedef enum OrtCompiledModelCompatibility {
+  OrtCompiledModelCompatibility_EP_NOT_APPLICABLE = 0,
+  OrtCompiledModelCompatibility_EP_SUPPORTED_OPTIMAL,
+  OrtCompiledModelCompatibility_EP_SUPPORTED_PREFER_RECOMPILATION,
+  OrtCompiledModelCompatibility_EP_UNSUPPORTED,
+} OrtCompiledModelCompatibility;
+
 struct OrtApi {
   /// \name OrtStatus
   /// @{
@@ -6069,7 +6079,6 @@ struct OrtApi {
 
   /** \brief Get the OrtNode's 'TENSOR' attribute as an OrtValue.
    *
-   * \param[in] node The OrtNode instance.
    * \param[in] attribute The OrtOpAttr instance.
    * \param[out] attr_tensor If successful, contains the 'TENSOR' attribute as a newly created OrtValue.
                              Must be freed with OrtApi::ReleaseValue.
@@ -6078,7 +6087,7 @@ struct OrtApi {
    *
    * \since Version 1.23.
    */
-  ORT_API2_STATUS(Node_GetTensorAttributeAsOrtValue, _In_ const OrtNode* node, _In_ const OrtOpAttr* attribute,
+  ORT_API2_STATUS(OpAttr_GetTensorAttributeAsOrtValue, _In_ const OrtOpAttr* attribute,
                   _Outptr_result_maybenull_ OrtValue** attr_tensor);
 
   /** \brief Get the attribute type as OrtOpAttrType from an OrtOpAttr.
@@ -6480,6 +6489,24 @@ struct OrtApi {
    * \since Version 1.23.
    */
   ORT_API2_STATUS(Graph_GetModelMetadata, _In_ const OrtGraph* graph, _Outptr_ OrtModelMetadata** out);
+
+  /** \brief Validate a compiled model's compatibility information for one or more EP devices.
+   *
+   * \param[in] ep_devices The EP devices to validate against (e.g., from GetEpDevices).
+   *                        All devices must belong to the same execution provider.
+   * \param[in] num_ep_devices The number of EP devices provided.
+   * \param[in] compatibility_info The compatibility info string produced when the model was compiled.
+   * \param[out] out_status The resulting compatibility status for the EP devices.
+   *
+   * \snippet{doc} snippets.dox OrtStatus Return Value
+   *
+   * \since Version 1.23.
+   */
+  ORT_API2_STATUS(GetModelCompatibilityForEpDevices,
+                  _In_reads_(num_ep_devices) const OrtEpDevice* const* ep_devices,
+                  _In_ size_t num_ep_devices,
+                  _In_ const char* compatibility_info,
+                  _Out_ OrtCompiledModelCompatibility* out_status);
 };
 
 /*