microsoft · snnn · Jul 30, 2025 · Jul 26, 2025 · Jul 28, 2025 · Jul 28, 2025
diff --git a/cmake/onnxruntime_session.cmake b/cmake/onnxruntime_session.cmake
@@ -5,6 +5,8 @@ file(GLOB onnxruntime_session_srcs CONFIGURE_DEPENDS
     "${ONNXRUNTIME_INCLUDE_DIR}/core/session/*.h"
     "${ONNXRUNTIME_ROOT}/core/session/*.h"
     "${ONNXRUNTIME_ROOT}/core/session/*.cc"
+    "${ONNXRUNTIME_ROOT}/core/session/plugin_ep/*.h"
+    "${ONNXRUNTIME_ROOT}/core/session/plugin_ep/*.cc"
     )
 
 if (onnxruntime_ENABLE_TRAINING_APIS)
@@ -22,7 +24,7 @@ endif()
 # which is not enabled for any minimal builds.
 if (onnxruntime_MINIMAL_BUILD)
   file(GLOB autoep_srcs
-    "${ONNXRUNTIME_ROOT}/core/session/ep_*.*"
+    "${ONNXRUNTIME_ROOT}/core/session/plugin_ep/*.*"
   )
 
   set(onnxruntime_session_src_exclude

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Exceptions.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/Exceptions.shared.cs
@@ -23,8 +23,8 @@ internal enum ErrorCode
         ModelLoaded = 8,
         NotImplemented = 9,
         InvalidGraph = 10,
-        ShapeInferenceNotRegistered = 11,
-        RequirementNotRegistered = 12,
+        ShapeInferenceNotRegistered = 11,  // TODO: should be ORT_EP_FAIL
+        RequirementNotRegistered = 12,     // TODO: should be ORT_MODEL_LOAD_CANCELED
     }
 
     /// <summary>

diff --git a/include/onnxruntime/core/common/status.h b/include/onnxruntime/core/common/status.h
@@ -46,6 +46,7 @@
   EP_FAIL = 11,
   MODEL_LOAD_CANCELED = 12,
   MODEL_REQUIRES_COMPILATION = 13,
+  NOT_FOUND = 14,
 };
 
 constexpr const char* StatusCodeToString(StatusCode status) noexcept {
@@ -78,6 +79,8 @@
       return "MODEL_LOAD_CANCELED";
     case StatusCode::MODEL_REQUIRES_COMPILATION:
       return "MODEL_REQUIRES_COMPILATION";
+    case StatusCode::NOT_FOUND:
+      return "NOT_FOUND";
     default:
       return "GENERAL ERROR";
   }
@@ -111,9 +114,11 @@
    case StatusCode::EP_FAIL:
      return HRESULT_FROM_WIN32(ERROR_INTERNAL_ERROR);
    case StatusCode::MODEL_LOAD_CANCELED:
       return HRESULT_FROM_WIN32(ERROR_CANCELLED);
     case StatusCode::MODEL_REQUIRES_COMPILATION:
       return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
+    case StatusCode::NOT_FOUND:
+      return HRESULT_FROM_WIN32(ERROR_NOT_FOUND);
     default:
       return E_FAIL;
   }

diff --git a/include/onnxruntime/core/session/environment.h b/include/onnxruntime/core/session/environment.h
@@ -20,7 +20,7 @@
 #include "core/platform/threadpool.h"
 
 #include "core/session/abi_devices.h"
-#include "core/session/ep_library.h"
+#include "core/session/plugin_ep/ep_library.h"
 #include "core/session/onnxruntime_c_api.h"
 
 struct OrtThreadingOptions;

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -264,6 +264,7 @@ typedef enum OrtErrorCode {
   ORT_EP_FAIL,
   ORT_MODEL_LOAD_CANCELED,
   ORT_MODEL_REQUIRES_COMPILATION,
+  ORT_NOT_FOUND,
 } OrtErrorCode;
 
 typedef enum OrtOpAttrType {
@@ -5846,14 +5847,13 @@ struct OrtApi {
 
   /** \brief Returns an OrtGraph that contains a subset of nodes in the source OrtGraph.
    *
-   * Note:
-   * The lifetime of "dst_graph" is tied to that of "src_graph", as they both internally reference
+   * \note The lifetime of "dst_graph" is tied to that of "src_graph", as they both internally reference
    * the same underlying graph.
    *
    * \param[in] src_graph The source OrtGraph instance.
    * \param[in] nodes A subset of the nodes/OrtNodes in 'graph'.
    * \param[in] num_nodes Number of nodes.
-   * \param[out] dst_sub_graph An OrtGraph created from a given set of nodes. Must be released by calling ReleaseGraph.
+   * \param[out] dst_graph An OrtGraph created from a given set of nodes. Must be released by calling ReleaseGraph.
    *
    * \snippet{doc} snippets.dox OrtStatus Return Value
    *
@@ -6032,6 +6032,11 @@ struct OrtApi {
    *                           Typical usage sets this to the result of Node_GetNumAttributes(). An error status is
    *                           returned if `num_attributes` is less than the number of node attributes.
    *
+   * \note ONNX Runtime automatically sets optional (unset) attributes to their default values if the default value
+   * is a constant expression that does not depend on other tensor/model characteristics. Conv's 'kernel_shape'
+   * attribute is an example of an optional attribute that does not have a constant default value. This function
+   * does not provide any unset optional attributes without a constant default value.
+   *
    * \snippet{doc} snippets.dox OrtStatus Return Value
    *
    * \since Version 1.23.
@@ -6043,14 +6048,22 @@ struct OrtApi {
    *
    * \param[in] node The OrtNode instance.
    * \param[in] attribute_name The name of the attribute
-   * \param[out] attribute Output the attribute if its name matches 'attribute_name', otherwise output nullptr.
+   * \param[out] attribute Output parameter set to the OrtOpAttr instance if an attribute by the given name exists.
+   *                       For an unset optional attribute, `attribute` is set to NULL and a non-error status is
+   *                       returned. For an invalid attribute name, `attribute` is set to NULL and an error status with
+   *                       code ORT_NOT_FOUND is returned.
+   *
+   * \note ONNX Runtime automatically sets optional (unset) attributes to their default values if the default value
+   * is a constant expression that does not depend on other tensor/model characteristics. Conv's 'kernel_shape'
+   * attribute is an example of an optional attribute that does not have a constant default value. This function
+   * does not provide any unset optional attributes without a constant default value.
    *
    * \snippet{doc} snippets.dox OrtStatus Return Value
    *
    * \since Version 1.23.
    */
   ORT_API2_STATUS(Node_GetAttributeByName, _In_ const OrtNode* node, _In_ const char* attribute_name,
-                  _Outptr_ const OrtOpAttr** attribute);
+                  _Outptr_result_maybenull_ const OrtOpAttr** attribute);
 
   /** \brief Get the attribute type as OrtOpAttrType from an OrtOpAttr.
    *

diff --git a/java/src/main/java/ai/onnxruntime/OrtException.java b/java/src/main/java/ai/onnxruntime/OrtException.java
@@ -81,11 +81,17 @@ public enum OrtErrorCode {
     /** The ONNX graph is invalid. */
     ORT_INVALID_GRAPH(10),
     /** The ORT execution provider failed. */
-    ORT_EP_FAIL(11);
+    ORT_EP_FAIL(11),
+    /** Model load was canceled. */
+    ORT_MODEL_LOAD_CANCELED(12),
+    /** Model requires compilation. */
+    ORT_MODEL_REQUIRES_COMPILATION(13),
+    /** Item was not found. */
+    ORT_NOT_FOUND(14);
 
     private final int value;
 
-    private static final OrtErrorCode[] values = new OrtErrorCode[12];
+    private static final OrtErrorCode[] values = new OrtErrorCode[15];
 
     static {
       for (OrtErrorCode ot : OrtErrorCode.values()) {

diff --git a/java/src/main/native/OrtJniUtil.c b/java/src/main/native/OrtJniUtil.c
@@ -1051,6 +1051,12 @@ jint convertErrorCode(OrtErrorCode code) {
             return 10;
         case ORT_EP_FAIL:
             return 11;
+        case ORT_MODEL_LOAD_CANCELED:
+            return 12;
+        case ORT_MODEL_REQUIRES_COMPILATION:
+            return 13;
+        case ORT_NOT_FOUND:
+            return 14;
         default:
             return -1; // Unknown error code
     }

diff --git a/onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_matmul.cc b/onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_matmul.cc
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include "core/common/cpuid_info.h"  // for CPUIDInfo::GetCPUIDInfo().HasArm_SME()
 #include "core/common/narrow.h"
 #include "core/common/safeint.h"
 #include "core/mlas/inc/mlas.h"
@@ -10,6 +11,7 @@
 #include "core/util/math_cpuonly.h"
 #include "core/util/qmath.h"
 
+#include <cassert>
 #include <algorithm>
 #include <vector>
 
@@ -169,43 +171,40 @@
     // only pack Matrix B
     if (input_idx == GetBIdx()) {
       const Tensor* b_zp_constant_tensor{nullptr};
-      bool b_quantization_is_asymmetric = false;
+      bool b_quantization_might_be_asymmetric = false;
 
-      // zero point tensor could be provided as a direct input to the kernel and not as a constant so this
-      // test is not sufficient
       const OrtValue* b_zp;
       if (Info().TryGetConstantInput(IN_B_ZERO_POINT, &b_zp)) {
         b_zp_constant_tensor = &b_zp->Get<Tensor>();
       }
 
-      // MlasDynamicQgemm requires symmetric quantization for B, so no zero point should exist or it should
-      // have a zero value
-      if (b_zp_constant_tensor != nullptr) {  // Covers the case where tensor is not a constant
-        const auto& shape = b_zp_constant_tensor->Shape();
-        const auto* zp_data = static_cast<const uint8_t*>(b_zp_constant_tensor->DataRaw());
-        size_t zp_size = static_cast<size_t>(shape.Size());
-        // MlasDynamicQgemm requires symmetric quantization: zp must be scalar 0 or 1D all-zero
-        if ((shape.NumDimensions() == 0) && (zp_data[0] == 0)) {
-          b_quantization_is_asymmetric = false;
-        } else if (shape.NumDimensions() == 1) {
-          b_quantization_is_asymmetric = false;
-          for (size_t i = 0; i < zp_size; ++i) {
-            if (zp_data[i] != 0) {
-              b_quantization_is_asymmetric = true;
-              break;
-            }
-          }
-        } else {
-          // Unsupported higher-rank zp tensor
-          b_quantization_is_asymmetric = true;
-        }
+      // MlasDynamicQgemm requires symmetric quantization for B, so the B zero point value should either be all zeros
+      // or not provided.
+      if (b_zp_constant_tensor != nullptr) {
+        // B zero point is constant. Check if it is all zeros.
+        assert(b_zp_constant_tensor->IsDataType<uint8_t>() || b_zp_constant_tensor->IsDataType<int8_t>());
+        const auto* zp_bytes = static_cast<const std::byte*>(b_zp_constant_tensor->DataRaw());
+        const size_t zp_size_in_bytes = b_zp_constant_tensor->SizeInBytes();
+        b_quantization_might_be_asymmetric = std::any_of(zp_bytes, zp_bytes + zp_size_in_bytes,
+                                                         [](std::byte v) { return v != std::byte{0}; });
+      } else {
+        // B zero point input is not constant. If it exists, we can't assume symmetric quantization.
+        const auto input_defs = Info().node().InputDefs();
+        const bool b_zp_input_exists = input_defs.size() > IN_B_ZERO_POINT && input_defs[IN_B_ZERO_POINT]->Exists();
+        b_quantization_might_be_asymmetric = b_zp_input_exists;
       }
 
       // MlasDynamicQgemm requires scale data to be available at packing stage
       const Tensor* b_scale_tensor = nullptr;
       const bool b_scale_available = Info().TryGetConstantInput(IN_B_SCALE, &b_scale_tensor);
 
-      can_use_dynamic_quant_mlas_ = (!b_quantization_is_asymmetric && b_scale_available);
+      can_use_dynamic_quant_mlas_ = (!b_quantization_might_be_asymmetric && b_scale_available);
+
+      // Currently, MlasDynamicQGemmBatch() and associated functions require SME or else they are no-ops.
+      // We check that here too before attempting to use them.
+      if (!CPUIDInfo::GetCPUIDInfo().HasArm_SME()) {
+        can_use_dynamic_quant_mlas_ = false;
+      }
 
       // Only handle the common case of a 2D weight matrix. Additional matrices
       // could be handled by stacking the packed buffers.

diff --git a/onnxruntime/core/graph/ep_api_types.cc b/onnxruntime/core/graph/ep_api_types.cc
@@ -87,6 +87,24 @@ static void ConvertNodeArgsToValueInfos(const EpGraph* ep_graph,
   }
 }
 
+#if !defined(ORT_MINIMAL_BUILD)
+static bool IsOptionalAttribute(const Node& node, const std::string& attr_name) {
+  const ONNX_NAMESPACE::OpSchema* op_schema = node.Op();
+  if (op_schema == nullptr) {
+    return false;
+  }
+
+  auto attr_schema_iter = op_schema->attributes().find(attr_name);
+  if (attr_schema_iter == op_schema->attributes().end()) {
+    return false;  // Not an attribute for this operator type.
+  }
+
+  const ONNX_NAMESPACE::OpSchema::Attribute& attr_schema = attr_schema_iter->second;
+
+  return !attr_schema.required;
+}
+#endif  // !defined(ORT_MINIMAL_BUILD)
+
 //
 // EpNode
 //
@@ -268,13 +286,20 @@ gsl::span<const EpValueInfo* const> EpNode::GetOutputsSpan() const {
   return outputs_;
 }
 
-const OrtOpAttr* EpNode::GetAttribute(const std::string& name) const {
+const OrtOpAttr* EpNode::GetAttribute(const std::string& name, bool& is_unset_optional_attr) const {
   auto iter = attributes_map_.find(name);
-  if (iter == attributes_map_.end()) {
-    return nullptr;
-  } else {
+  if (iter != attributes_map_.end()) {
+    is_unset_optional_attr = false;
     return reinterpret_cast<const OrtOpAttr*>(iter->second.get());
   }
+
+#if !defined(ORT_MINIMAL_BUILD)
+  is_unset_optional_attr = IsOptionalAttribute(node_, name);
+#else
+  // This is not properly set in a minimal build because it does not have access to the operator schema.
+  is_unset_optional_attr = false;
+#endif  // !defined(ORT_MINIMAL_BUILD)
+  return nullptr;
 }
 
 const std::string& EpNode::GetEpName() const {

diff --git a/onnxruntime/core/graph/ep_api_types.h b/onnxruntime/core/graph/ep_api_types.h
@@ -209,8 +209,9 @@ struct EpNode : public OrtNode {
   // Helper that returns this node's outputs as a span of EpValueInfo pointers.
   gsl::span<const EpValueInfo* const> GetOutputsSpan() const;
 
-  // Helper that gets the node's attributes by name.
-  const OrtOpAttr* GetAttribute(const std::string& name) const;
+  // Helper that gets the node's attributes by name. If the attribute is not set, returns NULL and sets the
+  // output parameter `is_unset_optional_attr` to true if this is an unset optional attribute.
+  const OrtOpAttr* GetAttribute(const std::string& name, bool& is_unset_optional_attr) const;
 
   // Helper that gets the execution provider name that this node is assigned to run on.
   const std::string& GetEpName() const;

diff --git a/onnxruntime/core/session/environment.cc b/onnxruntime/core/session/environment.cc
@@ -16,10 +16,10 @@
 #include "core/session/abi_session_options_impl.h"
 #include "core/session/allocator_adapters.h"
 #include "core/session/inference_session.h"
-#include "core/session/ep_factory_internal.h"
-#include "core/session/ep_library_internal.h"
-#include "core/session/ep_library_plugin.h"
-#include "core/session/ep_library_provider_bridge.h"
+#include "core/session/plugin_ep/ep_factory_internal.h"
+#include "core/session/plugin_ep/ep_library_internal.h"
+#include "core/session/plugin_ep/ep_library_plugin.h"
+#include "core/session/plugin_ep/ep_library_provider_bridge.h"
 #include "core/session/ort_apis.h"
 #include "core/session/utils.h"