diff --git a/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h b/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h index a32f465e44adf..026fc3b2dc0a0 100644 --- a/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h +++ b/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h @@ -34,6 +34,7 @@ constexpr const char* kProfilesOptShapes = "nv_profile_opt_shapes"; constexpr const char* kCudaGraphEnable = "enable_cuda_graph"; constexpr const char* kMultiProfileEnable = "nv_multi_profile_enable"; constexpr const char* kUseExternalDataInitializer = "nv_use_external_data_initializer"; +constexpr const char* kRuntimeCacheFile = "nv_runtime_cache_path"; } // namespace provider_option_names namespace run_option_names { diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc index 93b673f2df5bd..d76588caa64cb 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc @@ -3,6 +3,7 @@ // Licensed under the MIT License. #include #include +#include #include #include "core/providers/shared_library/provider_api.h" #include "core/providers/nv_tensorrt_rtx/nv_provider_options.h" @@ -654,9 +655,9 @@ void NvExecutionProvider::PerThreadContext::ResetTensorRTContext(std::string fus } } -bool NvExecutionProvider::PerThreadContext::UpdateTensorRTContext(std::string fused_node, std::unique_ptr context) { +bool NvExecutionProvider::PerThreadContext::UpdateTensorRTContext(std::string fused_node, tensorrt_ptr::unique_pointer_exec_ctx context) { if (!context) { - context = std::make_unique(); + context = tensorrt_ptr::unique_pointer_exec_ctx(); } trt_context_map_[fused_node] = std::move(context); @@ -757,11 +758,11 @@ bool NvExecutionProvider::PerThreadContext::IsTensorRTContextInMap(std::string f nvinfer1::IExecutionContext& NvExecutionProvider::PerThreadContext::GetTensorRTContext(std::string fused_node) { auto it = trt_context_map_.find(fused_node); if (it != trt_context_map_.end()) { - return *(it->second); // dereference shared pointer + return *(it->second.get()); // dereference shared pointer } - auto context = std::make_unique(); + auto context = tensorrt_ptr::unique_pointer_exec_ctx(); trt_context_map_[fused_node] = std::move(context); - return *(trt_context_map_[fused_node]); // dereference shared pointer + return *(trt_context_map_[fused_node].get()); // dereference shared pointer } void NvExecutionProvider::ReleasePerThreadContext() const { @@ -870,6 +871,20 @@ NvExecutionProvider::NvExecutionProvider(const NvExecutionProviderInfo& info) max_shared_mem_size_ = info.max_shared_mem_size; dump_subgraphs_ = info.dump_subgraphs; weight_stripped_engine_enable_ = info.weight_stripped_engine_enable; + // make runtime cache path absolute and create directory if it doesn't exist + if (!info.runtime_cache_path.empty()) { + std::filesystem::path p(info.runtime_cache_path); + std::filesystem::path abs_path = std::filesystem::absolute(p); + const auto& env = GetDefaultEnv(); + auto status = env.CreateFolder(abs_path.string()); + if (!status.IsOK()) { + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] The runtime cache directory could not be created at: " << abs_path + << ". Runtime cache is disabled."; + } else { + runtime_cache_ = abs_path; + } + } + onnx_model_folder_path_ = info.onnx_model_folder_path; onnx_model_bytestream_ = info.onnx_bytestream; onnx_model_bytestream_size_ = info.onnx_bytestream_size; @@ -1053,7 +1068,8 @@ NvExecutionProvider::NvExecutionProvider(const NvExecutionProviderInfo& info) << ", nv_onnx_model_bytestream_size_: " << onnx_model_bytestream_size_ << ", nv_onnx_external_bytestream_size_: " << onnx_external_data_bytestream_size_ << ", nv_use_external_data_initializer_: " << use_external_data_initializer_ - << ", nv_op_types_to_exclude: " << op_types_to_exclude_; + << ", nv_op_types_to_exclude: " << op_types_to_exclude_ + << ", nv_runtime_cache_path: " << runtime_cache_; } NvExecutionProvider::~NvExecutionProvider() { @@ -1574,8 +1590,8 @@ SubGraphCollection_t NvExecutionProvider::GetSupportedList(SubGraphCollection_t // the initializer was marked as external data by the ORT graph at load time since it was provided in memory size_t size = 0; const void* ptr = nullptr; - c_api.GetTensorSizeInBytes(&initializer_value, &size); - c_api.GetTensorData(&initializer_value, &ptr); + Ort::ThrowOnError(c_api.GetTensorSizeInBytes(&initializer_value, &size)); + Ort::ThrowOnError(c_api.GetTensorData(&initializer_value, &ptr)); userWeights.emplace_back(tp->name(), ptr, size); } else if (utils::HasExternalDataInMemory(*tp)) { // only copy and take ownership of the data if none of the above conditions are met @@ -2394,8 +2410,8 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr // the initializer was marked as external data by the ORT graph at load time since it was provided in memory size_t size = 0; const void* ptr = nullptr; - c_api.GetTensorSizeInBytes(&initializer_value, &size); - c_api.GetTensorData(&initializer_value, &ptr); + Ort::ThrowOnError(c_api.GetTensorSizeInBytes(&initializer_value, &size)); + Ort::ThrowOnError(c_api.GetTensorData(&initializer_value, &ptr)); userWeights.emplace_back(tp->name(), ptr, size); } else if (utils::HasExternalDataInMemory(*tp)) { // only copy and take ownership of the data if none of the above conditions are met @@ -2631,8 +2647,10 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr // // Otherwise engine will be handled at inference time. std::unique_ptr trt_engine; - std::unique_ptr trt_context; + tensorrt_ptr::unique_pointer_exec_ctx trt_context; + std::unique_ptr trt_runtime_cache; std::unique_ptr trt_runtime_config; + std::string runtime_cache_file = ""; // Generate file name for dumping ep context model if (dump_ep_context_model_ && ctx_model_path_.empty()) { @@ -2661,6 +2679,18 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr trt_runtime_config->setDynamicShapesKernelSpecializationStrategy(nvinfer1::DynamicShapesKernelSpecializationStrategy::kEAGER); } trt_runtime_config->setExecutionContextAllocationStrategy(nvinfer1::ExecutionContextAllocationStrategy::kUSER_MANAGED); + if (!runtime_cache_.empty()) { + runtime_cache_file = (runtime_cache_ / fused_node.Name()).string(); + trt_runtime_cache = std::unique_ptr(trt_runtime_config->createRuntimeCache()); + auto cache_data = file_utils::ReadFile(runtime_cache_file); + if (!trt_runtime_cache->deserialize(cache_data.data(), cache_data.size())) { + trt_runtime_cache = std::unique_ptr(trt_runtime_config->createRuntimeCache()); + LOGS_DEFAULT(INFO) << "TensorRT RTX failed to deserialize the runtime cache, will overwrite with new one" << std::endl; + } + if (!trt_runtime_config->setRuntimeCache(*trt_runtime_cache)) { + LOGS_DEFAULT(INFO) << "TensorRT RTX failed to set the runtime cache" << std::endl; + } + } if (detailed_build_log_) { auto engine_build_stop = std::chrono::steady_clock::now(); @@ -2721,7 +2751,9 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr // Build context // Note: Creating an execution context from an engine is thread safe per TRT doc // https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#threading - trt_context = std::unique_ptr(trt_engine->createExecutionContext(trt_runtime_config.get())); + trt_context = tensorrt_ptr::unique_pointer_exec_ctx( + trt_engine->createExecutionContext(trt_runtime_config.get()), + tensorrt_ptr::IExecutionContextDeleter(runtime_cache_file, std::move(trt_runtime_cache))); if (!trt_context) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "NvTensorRTRTX EP could not build execution context for fused node: " + fused_node.Name()); @@ -3002,7 +3034,7 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(const Gra std::unordered_map& output_map, std::vector& node_compute_funcs) { std::unique_ptr trt_engine; - std::unique_ptr trt_context; + tensorrt_ptr::unique_pointer_exec_ctx trt_context; std::unordered_map input_indexes; // TRT engine input name -> ORT kernel context input index std::unordered_map output_indexes; // TRT engine output name -> ORT kernel context output index std::unordered_map output_types; // TRT engine output name -> ORT output tensor type @@ -3024,11 +3056,33 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(const Gra return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, status.ErrorMessage()); } + std::unique_ptr trt_runtime_cache; + auto trt_runtime_config = std::unique_ptr(trt_engine->createRuntimeConfig()); + if (trt_runtime_config && cuda_graph_enable_) { + trt_runtime_config->setDynamicShapesKernelSpecializationStrategy(nvinfer1::DynamicShapesKernelSpecializationStrategy::kEAGER); + } + trt_runtime_config->setExecutionContextAllocationStrategy(nvinfer1::ExecutionContextAllocationStrategy::kUSER_MANAGED); + std::string runtime_cache_file = ""; + if (!runtime_cache_.empty()) { + runtime_cache_file = (runtime_cache_ / graph_body_viewer.GetNode(node_idx)->Name()).string(); + trt_runtime_cache = std::unique_ptr(trt_runtime_config->createRuntimeCache()); + auto cache_data = file_utils::ReadFile(runtime_cache_file); + if (!trt_runtime_cache->deserialize(cache_data.data(), cache_data.size())) { + trt_runtime_cache = std::unique_ptr(trt_runtime_config->createRuntimeCache()); + LOGS_DEFAULT(INFO) << "TensorRT RTX failed to deserialize the runtime cache, will overwrite with new one" << std::endl; + } + if (!trt_runtime_config->setRuntimeCache(*trt_runtime_cache)) { + LOGS_DEFAULT(INFO) << "TensorRT RTX failed to set the runtime cache" << std::endl; + } + } + // Build context // // Note: Creating an execution context from an engine is thread safe per TRT doc // https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#threading - trt_context = std::unique_ptr(trt_engine->createExecutionContext(nvinfer1::ExecutionContextAllocationStrategy::kUSER_MANAGED)); + trt_context = tensorrt_ptr::unique_pointer_exec_ctx( + trt_engine->createExecutionContext(trt_runtime_config.get()), + tensorrt_ptr::IExecutionContextDeleter(runtime_cache_file, std::move(trt_runtime_cache))); if (!trt_context) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "NvTensorRTRTX EP could not build execution context for fused node: " + fused_node.Name()); diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h index 9e5fd03756f02..e1a52561550e3 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h @@ -16,6 +16,7 @@ typedef void* cudnnStatus_t; #include #include "core/providers/cuda/cuda_graph.h" #include "nv_execution_provider_info.h" +#include "core/providers/nv_tensorrt_rtx/nv_file_utils.h" namespace onnxruntime { @@ -58,6 +59,26 @@ class TensorrtLogger : public nvinfer1::ILogger { }; namespace tensorrt_ptr { +/* + * custom deleter that will dump the optimized runtime cache when the execution context is destructed + */ +struct IExecutionContextDeleter { + IExecutionContextDeleter() = default; + IExecutionContextDeleter(const std::string& runtime_cache_path, std::unique_ptr&& runtime_cache) : runtime_cache_path_(runtime_cache_path), runtime_cache_(std::move(runtime_cache)) {}; + void operator()(nvinfer1::IExecutionContext* context) { + if (context != nullptr) { + if (!runtime_cache_path_.empty()) { + auto serialized_cache_data = std::unique_ptr(runtime_cache_->serialize()); + file_utils::WriteFile(runtime_cache_path_, serialized_cache_data->data(), serialized_cache_data->size()); + } + delete context; + } + } + + private: + std::string runtime_cache_path_; + std::unique_ptr runtime_cache_; +}; struct TensorrtInferDeleter { template @@ -70,6 +91,7 @@ struct TensorrtInferDeleter { template using unique_pointer = std::unique_ptr; +using unique_pointer_exec_ctx = std::unique_ptr; }; // namespace tensorrt_ptr // @@ -196,7 +218,7 @@ struct TensorrtFuncState { std::string fused_node_name; nvinfer1::IBuilder* builder; std::unique_ptr* engine = nullptr; - std::unique_ptr* context = nullptr; + tensorrt_ptr::unique_pointer_exec_ctx* context = nullptr; std::unique_ptr* network = nullptr; std::vector> input_info; std::vector> output_info; @@ -233,7 +255,7 @@ struct TensorrtShortFuncState { AllocatorHandle allocator = nullptr; std::string fused_node_name; std::unique_ptr* engine = nullptr; - std::unique_ptr* context = nullptr; + tensorrt_ptr::unique_pointer_exec_ctx* context = nullptr; std::vector> input_info; std::vector> output_info; std::mutex* tensorrt_mu_ptr = nullptr; @@ -356,6 +378,7 @@ class NvExecutionProvider : public IExecutionProvider { bool detailed_build_log_ = false; bool cuda_graph_enable_ = false; bool multi_profile_enable_ = false; + std::filesystem::path runtime_cache_; std::string cache_prefix_; std::string op_types_to_exclude_; int nv_profile_index_ = 0; @@ -386,7 +409,7 @@ class NvExecutionProvider : public IExecutionProvider { // But there are still some thread safe operations, please see here https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#threading // For those non thread safe operations, TRT EP uses (1) lock_guard or (2) PerThreadContext to make sure synchronization. std::unordered_map> engines_; - std::unordered_map> contexts_; + std::unordered_map contexts_; std::unordered_map> builders_; std::unordered_map> networks_; std::unordered_map>> input_info_; @@ -424,7 +447,7 @@ class NvExecutionProvider : public IExecutionProvider { bool IsTensorRTContextInMap(std::string fused_node); nvinfer1::IExecutionContext& GetTensorRTContext(std::string fused_node); - bool UpdateTensorRTContext(std::string fused_node, std::unique_ptr context); + bool UpdateTensorRTContext(std::string fused_node, tensorrt_ptr::unique_pointer_exec_ctx context); void ResetTensorRTContext(std::string fused_node); // CUDA Graph management @@ -454,7 +477,7 @@ class NvExecutionProvider : public IExecutionProvider { // See more details here: // https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#threading // https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_execution_context.html#a63cd95430852038ce864e17c670e0b36 - std::unordered_map> trt_context_map_; + std::unordered_map trt_context_map_; // The profile shape ranges for the engine that the execution context maintained by the PerThreadContext is built with. // TRT EP needs this info to determine whether to rebuild the execution context. diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc index 527a37f6c2b57..f25718114891b 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc @@ -51,6 +51,7 @@ NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const Provi .AddAssignmentToReference(nv::provider_option_names::kCudaGraphEnable, info.cuda_graph_enable) .AddAssignmentToReference(nv::provider_option_names::kUseExternalDataInitializer, info.use_external_data_initializer) .AddAssignmentToReference(nv::provider_option_names::kMultiProfileEnable, info.multi_profile_enable) + .AddAssignmentToReference(nv::provider_option_names::kRuntimeCacheFile, info.runtime_cache_path) .Parse(options)); // add new provider option here. info.user_compute_stream = user_compute_stream; @@ -105,7 +106,8 @@ ProviderOptions NvExecutionProviderInfo::ToProviderOptions(const NvExecutionProv {nv::provider_option_names::kProfilesMaxShapes, MakeStringWithClassicLocale(info.profile_max_shapes)}, {nv::provider_option_names::kProfilesOptShapes, MakeStringWithClassicLocale(info.profile_opt_shapes)}, {nv::provider_option_names::kCudaGraphEnable, MakeStringWithClassicLocale(info.cuda_graph_enable)}, - {nv::provider_option_names::kUseExternalDataInitializer, MakeStringWithClassicLocale(info.use_external_data_initializer)}}; + {nv::provider_option_names::kUseExternalDataInitializer, MakeStringWithClassicLocale(info.use_external_data_initializer)}, + {nv::provider_option_names::kRuntimeCacheFile, MakeStringWithClassicLocale(info.runtime_cache_path)}}; return options; } } // namespace onnxruntime diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h index b826925361b05..372e8196f38c2 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h @@ -37,7 +37,7 @@ struct NvExecutionProviderInfo { bool engine_decryption_enable{false}; std::string engine_decryption_lib_path{""}; bool force_sequential_engine_build{false}; - std::string timing_cache_path{""}; + std::string runtime_cache_path{""}; bool detailed_build_log{false}; bool sparsity_enable{false}; int auxiliary_streams{-1}; diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_file_utils.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_file_utils.h new file mode 100644 index 0000000000000..159aba0507ffb --- /dev/null +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_file_utils.h @@ -0,0 +1,52 @@ +#pragma once +#include +#include +#include +#include +#include +#include "core/providers/shared_library/provider_api.h" + +namespace onnxruntime { +namespace file_utils { + +inline std::vector ReadFile(const std::string& path) { + if (!std::filesystem::exists(path)) { + LOGS_DEFAULT(INFO) << "TensorRT RTX could not find the file and will create a new one " << path << std::endl; + return {}; + } + std::ifstream file(path, std::ios::in | std::ios::binary); + if (!file) { + ORT_THROW("Failed to open file: " + path); + } + file.seekg(0, std::ios::end); + std::streamsize size = file.tellg(); + file.seekg(0, std::ios::beg); + std::vector buffer(size); + if (size > 0 && !file.read(buffer.data(), size)) { + ORT_THROW("Failed to read file: " + path); + } + return buffer; +} + +inline void WriteFile(const std::string& path, const void* data, size_t size) { + if (std::filesystem::exists(path)) { + std::ofstream file(path, std::ios::out | std::ios::binary | std::ios::trunc); + if (!file) { + ORT_THROW("Failed to open file for writing: " + path); + } + file.write(static_cast(data), size); + } else { + LOGS_DEFAULT(INFO) << "TensorRT RTX a new file cache was written to " << path << std::endl; + // Create new file + std::ofstream file(path, std::ios::out | std::ios::binary); + if (!file) { + ORT_THROW("Failed to create file: " + path); + } + file.write(static_cast(data), size); + } +} + +inline void WriteFile(const std::string& path, const std::vector& data) { WriteFile(path, data.data(), data.size()); } + +} // namespace file_utils +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/nv_options_test.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/nv_options_test.cc new file mode 100644 index 0000000000000..d415548876153 --- /dev/null +++ b/onnxruntime/test/providers/nv_tensorrt_rtx/nv_options_test.cc @@ -0,0 +1,82 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Licensed under the MIT License. +#include "core/graph/onnx_protobuf.h" +#include "core/session/inference_session.h" +#include "test/providers/provider_test_utils.h" +#include "test/framework/test_utils.h" + +#include "test/util/include/scoped_env_vars.h" +#include "test/common/trt_op_test_utils.h" +#include "test/common/random_generator.h" +#include "test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.h" + +#include +#include + +using namespace std; +using namespace ONNX_NAMESPACE; +using namespace ::onnxruntime::logging; +extern std::unique_ptr ort_env; +namespace onnxruntime { + +namespace test { +size_t countFilesInDirectory(const std::string& dir_path) { + return std::distance(std::filesystem::directory_iterator(dir_path), std::filesystem::directory_iterator{}); +} + +TEST(NvExecutionProviderTest, RuntimeCaching) { + PathString model_name = ORT_TSTR("nv_execution_provider_runtime_caching.onnx"); + PathString model_name_ctx = ORT_TSTR("nv_execution_provider_runtime_caching_ctx.onnx"); + auto model_name_ctx_str = PathToUTF8(model_name_ctx); + clearFileIfExists(model_name_ctx); + std::string graph_name = "test"; + std::vector dims = {1, 3, 2}; + std::string runtime_cache_name = "./runtime_cache/"; + if (std::filesystem::exists(runtime_cache_name)) { + std::filesystem::remove_all(runtime_cache_name); + } + CreateBaseModel(model_name, graph_name, dims); + // AOT time + { + Ort::SessionOptions so; + Ort::RunOptions run_options; + so.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1"); + so.AddConfigEntry(kOrtSessionOptionEpContextFilePath, model_name_ctx_str.c_str()); + so.AppendExecutionProvider(kNvTensorRTRTXExecutionProvider, {{"nv_runtime_cache_path", runtime_cache_name.c_str()}}); + Ort::Session session_object(*ort_env, model_name.c_str(), so); + + auto io_binding = generate_io_binding(session_object); + session_object.Run(run_options, io_binding); + } + // the cache will be dumped to disk upon session destruction + ASSERT_TRUE(std::filesystem::exists(runtime_cache_name.c_str())); + ASSERT_TRUE(1 == countFilesInDirectory(runtime_cache_name)); + + // use existing cache + { + Ort::SessionOptions so; + Ort::RunOptions run_options; + so.AppendExecutionProvider(kNvTensorRTRTXExecutionProvider, {{"nv_runtime_cache_path", runtime_cache_name.c_str()}}); + Ort::Session session_object(*ort_env, model_name_ctx.c_str(), so); + } + ASSERT_TRUE(1 == countFilesInDirectory(runtime_cache_name)); + + // create new cache + { + Ort::SessionOptions so; + Ort::RunOptions run_options; + std::string new_cache_name = "/tmp/runtime_cache_new/"; + if (std::filesystem::exists(new_cache_name)) { + std::filesystem::remove_all(new_cache_name); + } + so.AppendExecutionProvider(kNvTensorRTRTXExecutionProvider, {{"nv_runtime_cache_path", new_cache_name.c_str()}}); + { + Ort::Session session_object(*ort_env, model_name_ctx.c_str(), so); + } + // the cache will be dumped to disk upon session destruction + ASSERT_TRUE(std::filesystem::exists(new_cache_name.c_str())); + ASSERT_TRUE(1 == countFilesInDirectory(new_cache_name)); + } +} +} // namespace test +} // namespace onnxruntime