Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
b4d0902
CXX EP related API beings
yuslepukhin Aug 12, 2025
2da60c0
XX
yuslepukhin Aug 14, 2025
c318e4b
Merge branch 'main' into yuslepukhin/cxx_ep_api
yuslepukhin Aug 14, 2025
bd82a8e
CPU build passes
yuslepukhin Aug 15, 2025
d9dafb7
Add C++ wrapper for CreateArenaCfgV2
yuslepukhin Aug 15, 2025
1f0c4fa
Fix up subscript
yuslepukhin Aug 15, 2025
c9adcf8
Address build errors, add coverage
yuslepukhin Aug 15, 2025
dad4309
Fix function name typo
yuslepukhin Aug 15, 2025
d0e47ef
Remove unused
yuslepukhin Aug 15, 2025
8c1489c
Merge branch 'yuslepukhin/cxx_ep_api' of https://github.com/microsoft…
yuslepukhin Aug 15, 2025
4eb14de
Remove stray include introduced by AI
yuslepukhin Aug 15, 2025
bd7cc76
Remove unused var
yuslepukhin Aug 18, 2025
6bc99e4
Start Phase II
yuslepukhin Aug 20, 2025
b21cd42
Merge branch 'main' into yuslepukhin/cxx_api_phase_ii
yuslepukhin Aug 22, 2025
4359e4b
Remove duplicate definition
yuslepukhin Aug 22, 2025
32e8b90
Fix a long standing bug on file memory mapping on windows.
yuslepukhin Aug 22, 2025
4a30754
Finish OpAttr
yuslepukhin Aug 26, 2025
c2c4d93
ValueInfo in progress
yuslepukhin Aug 26, 2025
3f9a19d
Refactor ValueInfo
yuslepukhin Aug 27, 2025
2c23fc2
Merge branch 'main' into yuslepukhin/cxx_api_phase_ii
yuslepukhin Aug 27, 2025
192e662
Refactor for new ValueInfo and plugin EP example
yuslepukhin Aug 27, 2025
a32d9da
Finish Node impl
yuslepukhin Aug 28, 2025
26398e3
Graph compiles
yuslepukhin Aug 28, 2025
c5dfab9
Add coverage, refactor tests
yuslepukhin Aug 29, 2025
feff549
Address test failuers
yuslepukhin Aug 29, 2025
100373e
Merge branch 'main' into yuslepukhin/cxx_api_phase_ii
yuslepukhin Aug 29, 2025
6e46299
Fix compile issues
yuslepukhin Aug 29, 2025
5ae1662
Address Copilot comments
yuslepukhin Aug 29, 2025
e0224a3
Fix CI errors
yuslepukhin Aug 29, 2025
c3a6a4f
Fix CI errors
yuslepukhin Aug 29, 2025
d40db18
CI errors
yuslepukhin Aug 29, 2025
c056654
CI issues
yuslepukhin Aug 29, 2025
e1d2f28
Address review comemnts
yuslepukhin Sep 2, 2025
f854bd4
Merge branch 'main' into yuslepukhin/cxx_api_phase_ii
yuslepukhin Sep 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
CPU build passes
  • Loading branch information
yuslepukhin committed Aug 15, 2025
commit bd82a8eb81996dd01d121e40e435edc3feb7f1da
31 changes: 31 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -575,12 +575,14 @@ ORT_DEFINE_RELEASE(Node);
ORT_DEFINE_RELEASE(Graph);
ORT_DEFINE_RELEASE(Model);
ORT_DEFINE_RELEASE(KeyValuePairs);
ORT_DEFINE_RELEASE(PrepackedWeightsContainer);
ORT_DEFINE_RELEASE_FROM_API_STRUCT(ModelCompilationOptions, GetCompileApi);
ORT_DEFINE_RELEASE_FROM_API_STRUCT(EpDevice, GetEpApi);

// This is defined explicitly since OrtTensorRTProviderOptionsV2 is not a C API type,
// but the struct has V2 in its name to indicate that it is the second version of the options.
inline void OrtRelease(OrtTensorRTProviderOptionsV2* ptr) { GetApi().ReleaseTensorRTProviderOptions(ptr); }
inline void OrtRelease(OrtCUDAProviderOptionsV2* ptr) { GetApi().ReleaseCUDAProviderOptions(ptr); }

#undef ORT_DEFINE_RELEASE
#undef ORT_DEFINE_RELEASE_FROM_API_STRUCT
Expand Down Expand Up @@ -704,6 +706,7 @@ struct Model;
struct Node;
struct ModelMetadata;
struct TypeInfo;
struct PrepackedWeightsContainer;
struct Session;
struct SessionOptions;
struct SyncStream;
Expand Down Expand Up @@ -784,6 +787,33 @@ struct TensorRTProviderOptions : detail::Base<OrtTensorRTProviderOptionsV2> {
std::string GetTensorRTProviderOptionsAsString() const;
};

/** \brief The CUDAProviderOptions (V2)
*
* Used to pass options to CUDA EP
*/

struct CUDAProviderOptions : detail::Base<OrtCUDAProviderOptionsV2> {
CUDAProviderOptions(std::nullptr_t) {}
/// \brief Wraps OrtApi::CreateCUDAProviderOptions
CUDAProviderOptions();
///< Wrapper around OrtApi::UpdateCUDAProviderOptions
void Update(const std::unordered_map<std::string, std::string>& options);
///< Wrapper around OrtApi::GetCUDAProviderOptionsAsString
std::string GetCUDAProviderOptionsAsString() const;
///< Wrapper around OrtApi::UpdateCUDAProviderOptionsWithValue
void UpdateWithValue(const char* key, void* value);
///< Wrapper around OrtApi::GetCUDAProviderOptionsByName
void* GetOptionByName(const char* name) const;
};

struct PrepackedWeightsContainer : detail::Base<OrtPrepackedWeightsContainer> {
using Base = detail::Base<OrtPrepackedWeightsContainer>;
explicit PrepackedWeightsContainer(std::nullptr_t) {} ///< No instance is created
explicit PrepackedWeightsContainer(OrtPrepackedWeightsContainer* p) : Base{p} {} ///< Take ownership of a pointer created by C API
/// \brief Wraps OrtApi::CreatePrepackedWeightsContainer
PrepackedWeightsContainer();
};

namespace detail {
template <typename T>
struct KeyValuePairsImpl : Ort::detail::Base<T> {
Expand Down Expand Up @@ -1230,6 +1260,7 @@ struct SessionOptionsImpl : ConstSessionOptionsImpl<T> {
const std::vector<char*>& external_initializer_file_buffer_array,
const std::vector<size_t>& external_initializer_file_lengths); ///< Wraps OrtApi::AddExternalInitializersFromFilesInMemory

SessionOptionsImpl& AppendExecutionProvider_CPU(int use_arena); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CPU
SessionOptionsImpl& AppendExecutionProvider_CUDA(const OrtCUDAProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CUDA
SessionOptionsImpl& AppendExecutionProvider_CUDA_V2(const OrtCUDAProviderOptionsV2& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CUDA_V2
SessionOptionsImpl& AppendExecutionProvider_ROCM(const OrtROCMProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_ROCM
Expand Down
44 changes: 44 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,44 @@ inline std::string TensorRTProviderOptions::GetTensorRTProviderOptionsAsString()
return std::string(options_str);
}

inline CUDAProviderOptions::CUDAProviderOptions() {
ThrowOnError(GetApi().CreateCUDAProviderOptions(&this->p_));
}

inline void CUDAProviderOptions::Update(const std::unordered_map<std::string, std::string>& options) {
std::vector<const char*> keys;
std::vector<const char*> values;
keys.reserve(options.size());
values.reserve(options.size());
for (const auto& kv : options) {
keys.push_back(kv.first.c_str());
values.push_back(kv.second.c_str());
}
ThrowOnError(GetApi().UpdateCUDAProviderOptions(p_, keys.data(), values.data(), options.size()));
}

inline std::string CUDAProviderOptions::GetCUDAProviderOptionsAsString() const {
AllocatorWithDefaultOptions allocator;
char* options_str = nullptr;
ThrowOnError(GetApi().GetCUDAProviderOptionsAsString(p_, allocator, &options_str));
std::unique_ptr<void, detail::AllocatedFree> options_str_g(options_str, detail::AllocatedFree(allocator));
return std::string(options_str);
}

inline void CUDAProviderOptions::UpdateWithValue(const char* key, void* value) {
ThrowOnError(GetApi().UpdateCUDAProviderOptionsWithValue(p_, key, value));
}

inline void* CUDAProviderOptions::GetOptionByName(const char* name) const {
void* value = nullptr;
ThrowOnError(GetApi().GetCUDAProviderOptionsByName(p_, name, &value));
return value;
}

inline PrepackedWeightsContainer::PrepackedWeightsContainer() {
ThrowOnError(GetApi().CreatePrepackedWeightsContainer(&this->p_));
}

namespace detail {
template <typename T>
inline const char* KeyValuePairsImpl<T>::GetValue(const char* key) const {
Expand Down Expand Up @@ -1155,6 +1193,12 @@ inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::AddExternalInitializersFrom
return *this;
}

template <typename T>
inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::AppendExecutionProvider_CPU(int use_arena) {
ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CPU(this->p_, use_arena));
return *this;
}

template <typename T>
inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::AppendExecutionProvider_CUDA(const OrtCUDAProviderOptions& provider_options) {
ThrowOnError(GetApi().SessionOptionsAppendExecutionProvider_CUDA(this->p_, &provider_options));
Expand Down
13 changes: 4 additions & 9 deletions onnxruntime/test/contrib_ops/skiplayernorm_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,15 +146,10 @@ static void RunOneTest(
execution_providers.push_back(DefaultRocmExecutionProvider());
} else {
if (strict) {
const auto& api = Ort::GetApi();
OrtCUDAProviderOptionsV2* cuda_options = nullptr;
ASSERT_TRUE(api.CreateCUDAProviderOptions(&cuda_options) == nullptr);
std::unique_ptr<OrtCUDAProviderOptionsV2, decltype(api.ReleaseCUDAProviderOptions)>
rel_cuda_options(cuda_options, api.ReleaseCUDAProviderOptions);
std::vector<const char*> keys{"enable_skip_layer_norm_strict_mode"};
std::vector<const char*> values{"1"};
ASSERT_TRUE(api.UpdateCUDAProviderOptions(rel_cuda_options.get(), keys.data(), values.data(), 1) == nullptr);
execution_providers.push_back(CudaExecutionProviderWithOptions(std::move(rel_cuda_options.get())));
Ort::CUDAProviderOptions cuda_options;
std::unordered_map<std::string, std::string> options = {{"enable_skip_layer_norm_strict_mode", "1"}};
cuda_options.Update(options);
execution_providers.push_back(CudaExecutionProviderWithOptions(std::move(cuda_options)));
} else {
execution_providers.push_back(DefaultCudaExecutionProvider());
}
Expand Down
3 changes: 1 addition & 2 deletions onnxruntime/test/global_thread_pools/test_inference.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,7 @@ static Ort::Session GetSessionObj(Ort::Env& env, T model_uri, int provider_type)

if (provider_type == 1) {
#ifdef USE_CUDA
OrtCUDAProviderOptionsV2* options;
Ort::ThrowOnError(Ort::GetApi().CreateCUDAProviderOptions(&options));
Ort::CUDAProviderOptions options;
session_options.AppendExecutionProvider_CUDA_V2(*options);
std::cout << "Running simple inference with cuda provider" << std::endl;
#else
Expand Down
46 changes: 11 additions & 35 deletions onnxruntime/test/perftest/ort_test_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -179,53 +179,29 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
#endif
} else if (provider_name_ == onnxruntime::kCudaExecutionProvider) {
#ifdef USE_CUDA
const auto& api = Ort::GetApi();
OrtCUDAProviderOptionsV2* cuda_options;
Ort::ThrowOnError(api.CreateCUDAProviderOptions(&cuda_options));
std::vector<const char*> option_keys, option_values;
// used to keep all option keys and value strings alive
std::list<std::string> buffer;
buffer.emplace_back("cudnn_conv_algo_search");
option_keys.push_back(buffer.back().c_str());
Ort::CUDAProviderOptions cuda_options;

const char* config_val = nullptr;
switch (performance_test_config.run_config.cudnn_conv_algo) {
case 0:
buffer.emplace_back("EXHAUSTIVE");
config_val = "EXHAUSTIVE";
break;
case 1:
buffer.emplace_back("HEURISTIC");
config_val = "HEURISTIC";
break;
default:
buffer.emplace_back("DEFAULT");
config_val = "DEFAULT";
break;
}
option_values.push_back(buffer.back().c_str());
provider_options.emplace("cudnn_conv_algo_search", config_val);
provider_options.emplace("do_copy_in_default_stream",
(!performance_test_config.run_config.do_cuda_copy_in_separate_stream ? "1" : "0"));

buffer.emplace_back("do_copy_in_default_stream");
option_keys.push_back(buffer.back().c_str());
buffer.emplace_back(!performance_test_config.run_config.do_cuda_copy_in_separate_stream ? "1" : "0");
option_values.push_back(buffer.back().c_str());

#ifdef _MSC_VER
std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
#else
std::string ov_string = performance_test_config.run_config.ep_runtime_config_string;
#endif

ParseSessionConfigs(ov_string, provider_options);
for (const auto& provider_option : provider_options) {
option_keys.push_back(provider_option.first.c_str());
option_values.push_back(provider_option.second.c_str());
}
cuda_options.Update(provider_options);

Ort::Status status(api.UpdateCUDAProviderOptions(cuda_options,
option_keys.data(), option_values.data(), option_keys.size()));
if (!status.IsOK()) {
OrtAllocator* allocator;
char* options;
Ort::ThrowOnError(api.GetAllocatorWithDefaultOptions(&allocator));
Ort::ThrowOnError(api.GetCUDAProviderOptionsAsString(cuda_options, allocator, &options));
ORT_THROW("[ERROR] [CUDA] Configuring the CUDA options failed with message: ", status.GetErrorMessage(),
"\nSupported options are:\n", options);
}
session_options.AppendExecutionProvider_CUDA_V2(*cuda_options);
if (performance_test_config.run_config.enable_cuda_io_binding) {
device_memory_name_ = CUDA;
Expand Down
33 changes: 13 additions & 20 deletions onnxruntime/test/providers/cpu/model_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -179,17 +179,14 @@ TEST_P(ModelTest, Run) {
ortso.SetLogId(ToUTF8String(test_case_name).c_str());
ortso.SetLogSeverityLevel(ORT_LOGGING_LEVEL_ERROR);
if (provider_name == "cuda") {
OrtCUDAProviderOptionsV2* cuda_options = nullptr;
ASSERT_CXX_ORTSTATUS_OK(OrtApis::CreateCUDAProviderOptions(&cuda_options));
std::unique_ptr<OrtCUDAProviderOptionsV2, decltype(&OrtApis::ReleaseCUDAProviderOptions)> rel_cuda_options(
cuda_options, &OrtApis::ReleaseCUDAProviderOptions);
Ort::CUDAProviderOptions cuda_options;

std::vector<const char*> keys{"device_id", "use_tf32"};
std::vector<const char*> values;
std::string device_id = Env::Default().GetEnvironmentVar("ONNXRUNTIME_TEST_GPU_DEVICE_ID");
values.push_back(device_id.empty() ? "0" : device_id.c_str());
values.push_back("0");
ASSERT_CXX_ORTSTATUS_OK(OrtApis::UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), 2));

std::unordered_map<std::string, std::string> options;
options["device_id"] = (device_id.empty() ? "0" : device_id.c_str());
options["use_tf32"] = "0"; // Disable TF32 for CUDA provider
cuda_options.Update(options);

ortso.AppendExecutionProvider_CUDA_V2(*cuda_options);
} else if (provider_name == "rocm") {
Expand All @@ -208,24 +205,20 @@ TEST_P(ModelTest, Run) {
#endif
else if (provider_name == "tensorrt") {
if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) {
Ort::TensorRTProviderOptions params;
ortso.AppendExecutionProvider_TensorRT_V2(*params);
OrtTensorRTProviderOptionsV2 params;
ortso.AppendExecutionProvider_TensorRT_V2(params);
} else {
Ort::TensorRTProviderOptions ep_option;
ortso.AppendExecutionProvider_TensorRT_V2(*ep_option);
}
// Enable CUDA fallback
OrtCUDAProviderOptionsV2* cuda_options = nullptr;
ASSERT_CXX_ORTSTATUS_OK(OrtApis::CreateCUDAProviderOptions(&cuda_options));
std::unique_ptr<OrtCUDAProviderOptionsV2, decltype(&OrtApis::ReleaseCUDAProviderOptions)> rel_cuda_options(
cuda_options, &OrtApis::ReleaseCUDAProviderOptions);
Ort::CUDAProviderOptions cuda_options;

std::vector<const char*> keys{"device_id", "use_tf32"};
std::vector<const char*> values;
std::string device_id = Env::Default().GetEnvironmentVar("ONNXRUNTIME_TEST_GPU_DEVICE_ID");
values.push_back(device_id.empty() ? "0" : device_id.c_str());
values.push_back("0");
ASSERT_CXX_ORTSTATUS_OK(OrtApis::UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), 2));
std::unordered_map<std::string, std::string> options;
options["device_id"] = (device_id.empty() ? "0" : device_id.c_str());
options["use_tf32"] = "0"; // Disable TF32 for CUDA provider
cuda_options.Update(options);

ortso.AppendExecutionProvider_CUDA_V2(*cuda_options);
} else if (provider_name == "migraphx") {
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/test/providers/qnn/qnn_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ TEST(QnnEP, TestDisableCPUFallback_ConflictingConfig) {
so.AppendExecutionProvider("QNN", options);

// Invalid! Adds CPU EP to session, but also disables CPU fallback.
Ort::Status status(OrtSessionOptionsAppendExecutionProvider_CPU(so, 1));
so.AppendExecutionProvider_CPU(1);

const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "constant_floats.onnx";

Expand Down Expand Up @@ -285,7 +285,7 @@ TEST_F(QnnHTPBackendTests, TestConvWithExternalData) {

so.AppendExecutionProvider("QNN", options);

Ort::Status status(OrtSessionOptionsAppendExecutionProvider_CPU(so, 1));
so.AppendExecutionProvider_CPU(1);

const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "conv_qdq_external_ini.onnx";

Expand Down
Loading