Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cmake/onnxruntime_session.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ file(GLOB onnxruntime_session_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_INCLUDE_DIR}/core/session/*.h"
"${ONNXRUNTIME_ROOT}/core/session/*.h"
"${ONNXRUNTIME_ROOT}/core/session/*.cc"
"${ONNXRUNTIME_ROOT}/core/session/plugin_ep/*.h"
"${ONNXRUNTIME_ROOT}/core/session/plugin_ep/*.cc"
)

if (onnxruntime_ENABLE_TRAINING_APIS)
Expand All @@ -22,7 +24,7 @@ endif()
# which is not enabled for any minimal builds.
if (onnxruntime_MINIMAL_BUILD)
file(GLOB autoep_srcs
"${ONNXRUNTIME_ROOT}/core/session/ep_*.*"
"${ONNXRUNTIME_ROOT}/core/session/plugin_ep/*.*"
)

set(onnxruntime_session_src_exclude
Expand Down
4 changes: 2 additions & 2 deletions csharp/src/Microsoft.ML.OnnxRuntime/Exceptions.shared.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ internal enum ErrorCode
ModelLoaded = 8,
NotImplemented = 9,
InvalidGraph = 10,
ShapeInferenceNotRegistered = 11,
RequirementNotRegistered = 12,
ShapeInferenceNotRegistered = 11, // TODO: should be ORT_EP_FAIL
RequirementNotRegistered = 12, // TODO: should be ORT_MODEL_LOAD_CANCELED
}

/// <summary>
Expand Down
5 changes: 5 additions & 0 deletions include/onnxruntime/core/common/status.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
EP_FAIL = 11,
MODEL_LOAD_CANCELED = 12,
MODEL_REQUIRES_COMPILATION = 13,
NOT_FOUND = 14,
};

constexpr const char* StatusCodeToString(StatusCode status) noexcept {
Expand Down Expand Up @@ -78,6 +79,8 @@
return "MODEL_LOAD_CANCELED";
case StatusCode::MODEL_REQUIRES_COMPILATION:
return "MODEL_REQUIRES_COMPILATION";
case StatusCode::NOT_FOUND:
return "NOT_FOUND";
default:
return "GENERAL ERROR";
}
Expand Down Expand Up @@ -111,9 +114,11 @@
case StatusCode::EP_FAIL:
return HRESULT_FROM_WIN32(ERROR_INTERNAL_ERROR);
case StatusCode::MODEL_LOAD_CANCELED:
return HRESULT_FROM_WIN32(ERROR_CANCELLED);

Check warning on line 117 in include/onnxruntime/core/common/status.h

View workflow job for this annotation

GitHub Actions / Optional Lint

[misspell] reported by reviewdog 🐶 "CANCELLED" is a misspelling of "CANCELED" Raw Output: ./include/onnxruntime/core/common/status.h:117:38: "CANCELLED" is a misspelling of "CANCELED"
case StatusCode::MODEL_REQUIRES_COMPILATION:
return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
case StatusCode::NOT_FOUND:
return HRESULT_FROM_WIN32(ERROR_NOT_FOUND);
default:
return E_FAIL;
}
Expand Down
2 changes: 1 addition & 1 deletion include/onnxruntime/core/session/environment.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include "core/platform/threadpool.h"

#include "core/session/abi_devices.h"
#include "core/session/ep_library.h"
#include "core/session/plugin_ep/ep_library.h"
#include "core/session/onnxruntime_c_api.h"

struct OrtThreadingOptions;
Expand Down
23 changes: 18 additions & 5 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ typedef enum OrtErrorCode {
ORT_EP_FAIL,
ORT_MODEL_LOAD_CANCELED,
ORT_MODEL_REQUIRES_COMPILATION,
ORT_NOT_FOUND,
} OrtErrorCode;

typedef enum OrtOpAttrType {
Expand Down Expand Up @@ -5846,14 +5847,13 @@ struct OrtApi {

/** \brief Returns an OrtGraph that contains a subset of nodes in the source OrtGraph.
*
* Note:
* The lifetime of "dst_graph" is tied to that of "src_graph", as they both internally reference
* \note The lifetime of "dst_graph" is tied to that of "src_graph", as they both internally reference
* the same underlying graph.
*
* \param[in] src_graph The source OrtGraph instance.
* \param[in] nodes A subset of the nodes/OrtNodes in 'graph'.
* \param[in] num_nodes Number of nodes.
* \param[out] dst_sub_graph An OrtGraph created from a given set of nodes. Must be released by calling ReleaseGraph.
* \param[out] dst_graph An OrtGraph created from a given set of nodes. Must be released by calling ReleaseGraph.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
Expand Down Expand Up @@ -6032,6 +6032,11 @@ struct OrtApi {
* Typical usage sets this to the result of Node_GetNumAttributes(). An error status is
* returned if `num_attributes` is less than the number of node attributes.
*
* \note ONNX Runtime automatically sets optional (unset) attributes to their default values if the default value
* is a constant expression that does not depend on other tensor/model characteristics. Conv's 'kernel_shape'
* attribute is an example of an optional attribute that does not have a constant default value. This function
* does not provide any unset optional attributes without a constant default value.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
Expand All @@ -6043,14 +6048,22 @@ struct OrtApi {
*
* \param[in] node The OrtNode instance.
* \param[in] attribute_name The name of the attribute
* \param[out] attribute Output the attribute if its name matches 'attribute_name', otherwise output nullptr.
* \param[out] attribute Output parameter set to the OrtOpAttr instance if an attribute by the given name exists.
* For an unset optional attribute, `attribute` is set to NULL and a non-error status is
* returned. For an invalid attribute name, `attribute` is set to NULL and an error status with
* code ORT_NOT_FOUND is returned.
*
* \note ONNX Runtime automatically sets optional (unset) attributes to their default values if the default value
* is a constant expression that does not depend on other tensor/model characteristics. Conv's 'kernel_shape'
* attribute is an example of an optional attribute that does not have a constant default value. This function
* does not provide any unset optional attributes without a constant default value.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(Node_GetAttributeByName, _In_ const OrtNode* node, _In_ const char* attribute_name,
_Outptr_ const OrtOpAttr** attribute);
_Outptr_result_maybenull_ const OrtOpAttr** attribute);

/** \brief Get the attribute type as OrtOpAttrType from an OrtOpAttr.
*
Expand Down
10 changes: 8 additions & 2 deletions java/src/main/java/ai/onnxruntime/OrtException.java
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,17 @@ public enum OrtErrorCode {
/** The ONNX graph is invalid. */
ORT_INVALID_GRAPH(10),
/** The ORT execution provider failed. */
ORT_EP_FAIL(11);
ORT_EP_FAIL(11),
/** Model load was canceled. */
ORT_MODEL_LOAD_CANCELED(12),
/** Model requires compilation. */
ORT_MODEL_REQUIRES_COMPILATION(13),
/** Item was not found. */
ORT_NOT_FOUND(14);

private final int value;

private static final OrtErrorCode[] values = new OrtErrorCode[12];
private static final OrtErrorCode[] values = new OrtErrorCode[15];

static {
for (OrtErrorCode ot : OrtErrorCode.values()) {
Expand Down
6 changes: 6 additions & 0 deletions java/src/main/native/OrtJniUtil.c
Original file line number Diff line number Diff line change
Expand Up @@ -1051,6 +1051,12 @@ jint convertErrorCode(OrtErrorCode code) {
return 10;
case ORT_EP_FAIL:
return 11;
case ORT_MODEL_LOAD_CANCELED:
return 12;
case ORT_MODEL_REQUIRES_COMPILATION:
return 13;
case ORT_NOT_FOUND:
return 14;
default:
return -1; // Unknown error code
}
Expand Down
49 changes: 24 additions & 25 deletions onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_matmul.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include "core/common/cpuid_info.h" // for CPUIDInfo::GetCPUIDInfo().HasArm_SME()
#include "core/common/narrow.h"
#include "core/common/safeint.h"
#include "core/mlas/inc/mlas.h"
Expand All @@ -10,6 +11,7 @@
#include "core/util/math_cpuonly.h"
#include "core/util/qmath.h"

#include <cassert>

Check warning on line 14 in onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_matmul.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Found C++ system header after other header. Should be: dynamic_quantize_matmul.h, c system, c++ system, other. [build/include_order] [4] Raw Output: onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_matmul.cc:14: Found C++ system header after other header. Should be: dynamic_quantize_matmul.h, c system, c++ system, other. [build/include_order] [4]
#include <algorithm>
#include <vector>

Expand Down Expand Up @@ -169,43 +171,40 @@
// only pack Matrix B
if (input_idx == GetBIdx()) {
const Tensor* b_zp_constant_tensor{nullptr};
bool b_quantization_is_asymmetric = false;
bool b_quantization_might_be_asymmetric = false;

// zero point tensor could be provided as a direct input to the kernel and not as a constant so this
// test is not sufficient
const OrtValue* b_zp;
if (Info().TryGetConstantInput(IN_B_ZERO_POINT, &b_zp)) {
b_zp_constant_tensor = &b_zp->Get<Tensor>();
}

// MlasDynamicQgemm requires symmetric quantization for B, so no zero point should exist or it should
// have a zero value
if (b_zp_constant_tensor != nullptr) { // Covers the case where tensor is not a constant
const auto& shape = b_zp_constant_tensor->Shape();
const auto* zp_data = static_cast<const uint8_t*>(b_zp_constant_tensor->DataRaw());
size_t zp_size = static_cast<size_t>(shape.Size());
// MlasDynamicQgemm requires symmetric quantization: zp must be scalar 0 or 1D all-zero
if ((shape.NumDimensions() == 0) && (zp_data[0] == 0)) {
b_quantization_is_asymmetric = false;
} else if (shape.NumDimensions() == 1) {
b_quantization_is_asymmetric = false;
for (size_t i = 0; i < zp_size; ++i) {
if (zp_data[i] != 0) {
b_quantization_is_asymmetric = true;
break;
}
}
} else {
// Unsupported higher-rank zp tensor
b_quantization_is_asymmetric = true;
}
// MlasDynamicQgemm requires symmetric quantization for B, so the B zero point value should either be all zeros
// or not provided.
if (b_zp_constant_tensor != nullptr) {
// B zero point is constant. Check if it is all zeros.
assert(b_zp_constant_tensor->IsDataType<uint8_t>() || b_zp_constant_tensor->IsDataType<int8_t>());
const auto* zp_bytes = static_cast<const std::byte*>(b_zp_constant_tensor->DataRaw());
const size_t zp_size_in_bytes = b_zp_constant_tensor->SizeInBytes();
b_quantization_might_be_asymmetric = std::any_of(zp_bytes, zp_bytes + zp_size_in_bytes,
[](std::byte v) { return v != std::byte{0}; });
} else {
// B zero point input is not constant. If it exists, we can't assume symmetric quantization.
const auto input_defs = Info().node().InputDefs();
const bool b_zp_input_exists = input_defs.size() > IN_B_ZERO_POINT && input_defs[IN_B_ZERO_POINT]->Exists();
b_quantization_might_be_asymmetric = b_zp_input_exists;
}

// MlasDynamicQgemm requires scale data to be available at packing stage
const Tensor* b_scale_tensor = nullptr;
const bool b_scale_available = Info().TryGetConstantInput(IN_B_SCALE, &b_scale_tensor);

can_use_dynamic_quant_mlas_ = (!b_quantization_is_asymmetric && b_scale_available);
can_use_dynamic_quant_mlas_ = (!b_quantization_might_be_asymmetric && b_scale_available);

// Currently, MlasDynamicQGemmBatch() and associated functions require SME or else they are no-ops.
// We check that here too before attempting to use them.
if (!CPUIDInfo::GetCPUIDInfo().HasArm_SME()) {
can_use_dynamic_quant_mlas_ = false;
}

// Only handle the common case of a 2D weight matrix. Additional matrices
// could be handled by stacking the packed buffers.
Expand Down
33 changes: 29 additions & 4 deletions onnxruntime/core/graph/ep_api_types.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,24 @@ static void ConvertNodeArgsToValueInfos(const EpGraph* ep_graph,
}
}

#if !defined(ORT_MINIMAL_BUILD)
static bool IsOptionalAttribute(const Node& node, const std::string& attr_name) {
const ONNX_NAMESPACE::OpSchema* op_schema = node.Op();
if (op_schema == nullptr) {
return false;
}

auto attr_schema_iter = op_schema->attributes().find(attr_name);
if (attr_schema_iter == op_schema->attributes().end()) {
return false; // Not an attribute for this operator type.
}

const ONNX_NAMESPACE::OpSchema::Attribute& attr_schema = attr_schema_iter->second;

return !attr_schema.required;
}
#endif // !defined(ORT_MINIMAL_BUILD)

//
// EpNode
//
Expand Down Expand Up @@ -268,13 +286,20 @@ gsl::span<const EpValueInfo* const> EpNode::GetOutputsSpan() const {
return outputs_;
}

const OrtOpAttr* EpNode::GetAttribute(const std::string& name) const {
const OrtOpAttr* EpNode::GetAttribute(const std::string& name, bool& is_unset_optional_attr) const {
auto iter = attributes_map_.find(name);
if (iter == attributes_map_.end()) {
return nullptr;
} else {
if (iter != attributes_map_.end()) {
is_unset_optional_attr = false;
return reinterpret_cast<const OrtOpAttr*>(iter->second.get());
}

#if !defined(ORT_MINIMAL_BUILD)
is_unset_optional_attr = IsOptionalAttribute(node_, name);
#else
// This is not properly set in a minimal build because it does not have access to the operator schema.
is_unset_optional_attr = false;
#endif // !defined(ORT_MINIMAL_BUILD)
return nullptr;
}

const std::string& EpNode::GetEpName() const {
Expand Down
5 changes: 3 additions & 2 deletions onnxruntime/core/graph/ep_api_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,9 @@ struct EpNode : public OrtNode {
// Helper that returns this node's outputs as a span of EpValueInfo pointers.
gsl::span<const EpValueInfo* const> GetOutputsSpan() const;

// Helper that gets the node's attributes by name.
const OrtOpAttr* GetAttribute(const std::string& name) const;
// Helper that gets the node's attributes by name. If the attribute is not set, returns NULL and sets the
// output parameter `is_unset_optional_attr` to true if this is an unset optional attribute.
const OrtOpAttr* GetAttribute(const std::string& name, bool& is_unset_optional_attr) const;

// Helper that gets the execution provider name that this node is assigned to run on.
const std::string& GetEpName() const;
Expand Down
8 changes: 4 additions & 4 deletions onnxruntime/core/session/environment.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
#include "core/session/abi_session_options_impl.h"
#include "core/session/allocator_adapters.h"
#include "core/session/inference_session.h"
#include "core/session/ep_factory_internal.h"
#include "core/session/ep_library_internal.h"
#include "core/session/ep_library_plugin.h"
#include "core/session/ep_library_provider_bridge.h"
#include "core/session/plugin_ep/ep_factory_internal.h"
#include "core/session/plugin_ep/ep_library_internal.h"
#include "core/session/plugin_ep/ep_library_plugin.h"
#include "core/session/plugin_ep/ep_library_provider_bridge.h"
#include "core/session/ort_apis.h"
#include "core/session/utils.h"

Expand Down
Loading
Loading