diff --git a/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h b/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h
index a32f465e44adf..026fc3b2dc0a0 100644
--- a/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h
+++ b/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h
@@ -34,6 +34,7 @@ constexpr const char* kProfilesOptShapes = "nv_profile_opt_shapes";
 constexpr const char* kCudaGraphEnable = "enable_cuda_graph";
 constexpr const char* kMultiProfileEnable = "nv_multi_profile_enable";
 constexpr const char* kUseExternalDataInitializer = "nv_use_external_data_initializer";
+constexpr const char* kRuntimeCacheFile = "nv_runtime_cache_path";
 
 }  // namespace provider_option_names
 namespace run_option_names {
diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc
index 93b673f2df5bd..d76588caa64cb 100644
--- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc
+++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc
@@ -3,6 +3,7 @@
 // Licensed under the MIT License.
 #include <fstream>
 #include <list>
+#include <thread>
 #include <unordered_set>
 #include "core/providers/shared_library/provider_api.h"
 #include "core/providers/nv_tensorrt_rtx/nv_provider_options.h"
@@ -654,9 +655,9 @@ void NvExecutionProvider::PerThreadContext::ResetTensorRTContext(std::string fus
   }
 }
 
-bool NvExecutionProvider::PerThreadContext::UpdateTensorRTContext(std::string fused_node, std::unique_ptr<nvinfer1::IExecutionContext> context) {
+bool NvExecutionProvider::PerThreadContext::UpdateTensorRTContext(std::string fused_node, tensorrt_ptr::unique_pointer_exec_ctx context) {
   if (!context) {
-    context = std::make_unique<nvinfer1::IExecutionContext>();
+    context = tensorrt_ptr::unique_pointer_exec_ctx();
   }
   trt_context_map_[fused_node] = std::move(context);
 
@@ -757,11 +758,11 @@ bool NvExecutionProvider::PerThreadContext::IsTensorRTContextInMap(std::string f
 nvinfer1::IExecutionContext& NvExecutionProvider::PerThreadContext::GetTensorRTContext(std::string fused_node) {
   auto it = trt_context_map_.find(fused_node);
   if (it != trt_context_map_.end()) {
-    return *(it->second);  // dereference shared pointer
+    return *(it->second.get());  // dereference shared pointer
   }
-  auto context = std::make_unique<nvinfer1::IExecutionContext>();
+  auto context = tensorrt_ptr::unique_pointer_exec_ctx();
   trt_context_map_[fused_node] = std::move(context);
-  return *(trt_context_map_[fused_node]);  // dereference shared pointer
+  return *(trt_context_map_[fused_node].get());  // dereference shared pointer
 }
 
 void NvExecutionProvider::ReleasePerThreadContext() const {
@@ -870,6 +871,20 @@ NvExecutionProvider::NvExecutionProvider(const NvExecutionProviderInfo& info)
   max_shared_mem_size_ = info.max_shared_mem_size;
   dump_subgraphs_ = info.dump_subgraphs;
   weight_stripped_engine_enable_ = info.weight_stripped_engine_enable;
+  // make runtime cache path absolute and create directory if it doesn't exist
+  if (!info.runtime_cache_path.empty()) {
+    std::filesystem::path p(info.runtime_cache_path);
+    std::filesystem::path abs_path = std::filesystem::absolute(p);
+    const auto& env = GetDefaultEnv();
+    auto status = env.CreateFolder(abs_path.string());
+    if (!status.IsOK()) {
+      LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] The runtime cache directory could not be created at: " << abs_path
+                            << ". Runtime cache is disabled.";
+    } else {
+      runtime_cache_ = abs_path;
+    }
+  }
+
   onnx_model_folder_path_ = info.onnx_model_folder_path;
   onnx_model_bytestream_ = info.onnx_bytestream;
   onnx_model_bytestream_size_ = info.onnx_bytestream_size;
@@ -1053,7 +1068,8 @@ NvExecutionProvider::NvExecutionProvider(const NvExecutionProviderInfo& info)
                         << ", nv_onnx_model_bytestream_size_: " << onnx_model_bytestream_size_
                         << ", nv_onnx_external_bytestream_size_: " << onnx_external_data_bytestream_size_
                         << ", nv_use_external_data_initializer_: " << use_external_data_initializer_
-                        << ", nv_op_types_to_exclude: " << op_types_to_exclude_;
+                        << ", nv_op_types_to_exclude: " << op_types_to_exclude_
+                        << ", nv_runtime_cache_path: " << runtime_cache_;
 }
 
 NvExecutionProvider::~NvExecutionProvider() {
@@ -1574,8 +1590,8 @@ SubGraphCollection_t NvExecutionProvider::GetSupportedList(SubGraphCollection_t
               // the initializer was marked as external data by the ORT graph at load time since it was provided in memory
               size_t size = 0;
               const void* ptr = nullptr;
-              c_api.GetTensorSizeInBytes(&initializer_value, &size);
-              c_api.GetTensorData(&initializer_value, &ptr);
+              Ort::ThrowOnError(c_api.GetTensorSizeInBytes(&initializer_value, &size));
+              Ort::ThrowOnError(c_api.GetTensorData(&initializer_value, &ptr));
               userWeights.emplace_back(tp->name(), ptr, size);
             } else if (utils::HasExternalDataInMemory(*tp)) {
               // only copy and take ownership of the data if none of the above conditions are met
@@ -2394,8 +2410,8 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr
         // the initializer was marked as external data by the ORT graph at load time since it was provided in memory
         size_t size = 0;
         const void* ptr = nullptr;
-        c_api.GetTensorSizeInBytes(&initializer_value, &size);
-        c_api.GetTensorData(&initializer_value, &ptr);
+        Ort::ThrowOnError(c_api.GetTensorSizeInBytes(&initializer_value, &size));
+        Ort::ThrowOnError(c_api.GetTensorData(&initializer_value, &ptr));
         userWeights.emplace_back(tp->name(), ptr, size);
       } else if (utils::HasExternalDataInMemory(*tp)) {
         // only copy and take ownership of the data if none of the above conditions are met
@@ -2631,8 +2647,10 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr
   //
   // Otherwise engine will be handled at inference time.
   std::unique_ptr<nvinfer1::ICudaEngine> trt_engine;
-  std::unique_ptr<nvinfer1::IExecutionContext> trt_context;
+  tensorrt_ptr::unique_pointer_exec_ctx trt_context;
+  std::unique_ptr<nvinfer1::IRuntimeCache> trt_runtime_cache;
   std::unique_ptr<nvinfer1::IRuntimeConfig> trt_runtime_config;
+  std::string runtime_cache_file = "";
 
   // Generate file name for dumping ep context model
   if (dump_ep_context_model_ && ctx_model_path_.empty()) {
@@ -2661,6 +2679,18 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr
       trt_runtime_config->setDynamicShapesKernelSpecializationStrategy(nvinfer1::DynamicShapesKernelSpecializationStrategy::kEAGER);
     }
     trt_runtime_config->setExecutionContextAllocationStrategy(nvinfer1::ExecutionContextAllocationStrategy::kUSER_MANAGED);
+    if (!runtime_cache_.empty()) {
+      runtime_cache_file = (runtime_cache_ / fused_node.Name()).string();
+      trt_runtime_cache = std::unique_ptr<nvinfer1::IRuntimeCache>(trt_runtime_config->createRuntimeCache());
+      auto cache_data = file_utils::ReadFile(runtime_cache_file);
+      if (!trt_runtime_cache->deserialize(cache_data.data(), cache_data.size())) {
+        trt_runtime_cache = std::unique_ptr<nvinfer1::IRuntimeCache>(trt_runtime_config->createRuntimeCache());
+        LOGS_DEFAULT(INFO) << "TensorRT RTX failed to deserialize the runtime cache, will overwrite with new one" << std::endl;
+      }
+      if (!trt_runtime_config->setRuntimeCache(*trt_runtime_cache)) {
+        LOGS_DEFAULT(INFO) << "TensorRT RTX failed to set the runtime cache" << std::endl;
+      }
+    }
 
     if (detailed_build_log_) {
       auto engine_build_stop = std::chrono::steady_clock::now();
@@ -2721,7 +2751,9 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr
   // Build context
   // Note: Creating an execution context from an engine is thread safe per TRT doc
   // https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#threading
-  trt_context = std::unique_ptr<nvinfer1::IExecutionContext>(trt_engine->createExecutionContext(trt_runtime_config.get()));
+  trt_context = tensorrt_ptr::unique_pointer_exec_ctx(
+      trt_engine->createExecutionContext(trt_runtime_config.get()),
+      tensorrt_ptr::IExecutionContextDeleter(runtime_cache_file, std::move(trt_runtime_cache)));
   if (!trt_context) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
                            "NvTensorRTRTX EP could not build execution context for fused node: " + fused_node.Name());
@@ -3002,7 +3034,7 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(const Gra
                                                                        std::unordered_map<std::string, size_t>& output_map,
                                                                        std::vector<NodeComputeInfo>& node_compute_funcs) {
   std::unique_ptr<nvinfer1::ICudaEngine> trt_engine;
-  std::unique_ptr<nvinfer1::IExecutionContext> trt_context;
+  tensorrt_ptr::unique_pointer_exec_ctx trt_context;
   std::unordered_map<std::string, size_t> input_indexes;   // TRT engine input name -> ORT kernel context input index
   std::unordered_map<std::string, size_t> output_indexes;  // TRT engine output name -> ORT kernel context output index
   std::unordered_map<std::string, size_t> output_types;    // TRT engine output name -> ORT output tensor type
@@ -3024,11 +3056,33 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(const Gra
     return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, status.ErrorMessage());
   }
 
+  std::unique_ptr<nvinfer1::IRuntimeCache> trt_runtime_cache;
+  auto trt_runtime_config = std::unique_ptr<nvinfer1::IRuntimeConfig>(trt_engine->createRuntimeConfig());
+  if (trt_runtime_config && cuda_graph_enable_) {
+    trt_runtime_config->setDynamicShapesKernelSpecializationStrategy(nvinfer1::DynamicShapesKernelSpecializationStrategy::kEAGER);
+  }
+  trt_runtime_config->setExecutionContextAllocationStrategy(nvinfer1::ExecutionContextAllocationStrategy::kUSER_MANAGED);
+  std::string runtime_cache_file = "";
+  if (!runtime_cache_.empty()) {
+    runtime_cache_file = (runtime_cache_ / graph_body_viewer.GetNode(node_idx)->Name()).string();
+    trt_runtime_cache = std::unique_ptr<nvinfer1::IRuntimeCache>(trt_runtime_config->createRuntimeCache());
+    auto cache_data = file_utils::ReadFile(runtime_cache_file);
+    if (!trt_runtime_cache->deserialize(cache_data.data(), cache_data.size())) {
+      trt_runtime_cache = std::unique_ptr<nvinfer1::IRuntimeCache>(trt_runtime_config->createRuntimeCache());
+      LOGS_DEFAULT(INFO) << "TensorRT RTX failed to deserialize the runtime cache, will overwrite with new one" << std::endl;
+    }
+    if (!trt_runtime_config->setRuntimeCache(*trt_runtime_cache)) {
+      LOGS_DEFAULT(INFO) << "TensorRT RTX failed to set the runtime cache" << std::endl;
+    }
+  }
+
   // Build context
   //
   // Note: Creating an execution context from an engine is thread safe per TRT doc
   // https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#threading
-  trt_context = std::unique_ptr<nvinfer1::IExecutionContext>(trt_engine->createExecutionContext(nvinfer1::ExecutionContextAllocationStrategy::kUSER_MANAGED));
+  trt_context = tensorrt_ptr::unique_pointer_exec_ctx(
+      trt_engine->createExecutionContext(trt_runtime_config.get()),
+      tensorrt_ptr::IExecutionContextDeleter(runtime_cache_file, std::move(trt_runtime_cache)));
   if (!trt_context) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
                            "NvTensorRTRTX EP could not build execution context for fused node: " + fused_node.Name());
diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h
index 9e5fd03756f02..e1a52561550e3 100644
--- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h
+++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h
@@ -16,6 +16,7 @@ typedef void* cudnnStatus_t;
 #include <mutex>
 #include "core/providers/cuda/cuda_graph.h"
 #include "nv_execution_provider_info.h"
+#include "core/providers/nv_tensorrt_rtx/nv_file_utils.h"
 
 namespace onnxruntime {
 
@@ -58,6 +59,26 @@ class TensorrtLogger : public nvinfer1::ILogger {
 };
 
 namespace tensorrt_ptr {
+/*
+ * custom deleter that will dump the optimized runtime cache when the execution context is destructed
+ */
+struct IExecutionContextDeleter {
+  IExecutionContextDeleter() = default;
+  IExecutionContextDeleter(const std::string& runtime_cache_path, std::unique_ptr<nvinfer1::IRuntimeCache>&& runtime_cache) : runtime_cache_path_(runtime_cache_path), runtime_cache_(std::move(runtime_cache)) {};
+  void operator()(nvinfer1::IExecutionContext* context) {
+    if (context != nullptr) {
+      if (!runtime_cache_path_.empty()) {
+        auto serialized_cache_data = std::unique_ptr<nvinfer1::IHostMemory>(runtime_cache_->serialize());
+        file_utils::WriteFile(runtime_cache_path_, serialized_cache_data->data(), serialized_cache_data->size());
+      }
+      delete context;
+    }
+  }
+
+ private:
+  std::string runtime_cache_path_;
+  std::unique_ptr<nvinfer1::IRuntimeCache> runtime_cache_;
+};
 
 struct TensorrtInferDeleter {
   template <typename T>
@@ -70,6 +91,7 @@ struct TensorrtInferDeleter {
 
 template <typename T>
 using unique_pointer = std::unique_ptr<T, TensorrtInferDeleter>;
+using unique_pointer_exec_ctx = std::unique_ptr<nvinfer1::IExecutionContext, IExecutionContextDeleter>;
 };  // namespace tensorrt_ptr
 
 //
@@ -196,7 +218,7 @@ struct TensorrtFuncState {
   std::string fused_node_name;
   nvinfer1::IBuilder* builder;
   std::unique_ptr<nvinfer1::ICudaEngine>* engine = nullptr;
-  std::unique_ptr<nvinfer1::IExecutionContext>* context = nullptr;
+  tensorrt_ptr::unique_pointer_exec_ctx* context = nullptr;
   std::unique_ptr<nvinfer1::INetworkDefinition>* network = nullptr;
   std::vector<std::unordered_map<std::string, size_t>> input_info;
   std::vector<std::unordered_map<std::string, size_t>> output_info;
@@ -233,7 +255,7 @@ struct TensorrtShortFuncState {
   AllocatorHandle allocator = nullptr;
   std::string fused_node_name;
   std::unique_ptr<nvinfer1::ICudaEngine>* engine = nullptr;
-  std::unique_ptr<nvinfer1::IExecutionContext>* context = nullptr;
+  tensorrt_ptr::unique_pointer_exec_ctx* context = nullptr;
   std::vector<std::unordered_map<std::string, size_t>> input_info;
   std::vector<std::unordered_map<std::string, size_t>> output_info;
   std::mutex* tensorrt_mu_ptr = nullptr;
@@ -356,6 +378,7 @@ class NvExecutionProvider : public IExecutionProvider {
   bool detailed_build_log_ = false;
   bool cuda_graph_enable_ = false;
   bool multi_profile_enable_ = false;
+  std::filesystem::path runtime_cache_;
   std::string cache_prefix_;
   std::string op_types_to_exclude_;
   int nv_profile_index_ = 0;
@@ -386,7 +409,7 @@ class NvExecutionProvider : public IExecutionProvider {
   // But there are still some thread safe operations, please see here https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#threading
   // For those non thread safe operations, TRT EP uses (1) lock_guard or (2) PerThreadContext to make sure synchronization.
   std::unordered_map<std::string, std::unique_ptr<nvinfer1::ICudaEngine>> engines_;
-  std::unordered_map<std::string, std::unique_ptr<nvinfer1::IExecutionContext>> contexts_;
+  std::unordered_map<std::string, tensorrt_ptr::unique_pointer_exec_ctx> contexts_;
   std::unordered_map<std::string, std::unique_ptr<nvinfer1::IBuilder>> builders_;
   std::unordered_map<std::string, std::unique_ptr<nvinfer1::INetworkDefinition>> networks_;
   std::unordered_map<std::string, std::vector<std::unordered_map<std::string, size_t>>> input_info_;
@@ -424,7 +447,7 @@ class NvExecutionProvider : public IExecutionProvider {
 
     bool IsTensorRTContextInMap(std::string fused_node);
     nvinfer1::IExecutionContext& GetTensorRTContext(std::string fused_node);
-    bool UpdateTensorRTContext(std::string fused_node, std::unique_ptr<nvinfer1::IExecutionContext> context);
+    bool UpdateTensorRTContext(std::string fused_node, tensorrt_ptr::unique_pointer_exec_ctx context);
     void ResetTensorRTContext(std::string fused_node);
 
     // CUDA Graph management
@@ -454,7 +477,7 @@ class NvExecutionProvider : public IExecutionProvider {
     // See more details here:
     // https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#threading
     // https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_execution_context.html#a63cd95430852038ce864e17c670e0b36
-    std::unordered_map<std::string, std::unique_ptr<nvinfer1::IExecutionContext>> trt_context_map_;
+    std::unordered_map<std::string, tensorrt_ptr::unique_pointer_exec_ctx> trt_context_map_;
 
     // The profile shape ranges for the engine that the execution context maintained by the PerThreadContext is built with.
     // TRT EP needs this info to determine whether to rebuild the execution context.
diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc
index 527a37f6c2b57..f25718114891b 100644
--- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc
+++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc
@@ -51,6 +51,7 @@ NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const Provi
           .AddAssignmentToReference(nv::provider_option_names::kCudaGraphEnable, info.cuda_graph_enable)
           .AddAssignmentToReference(nv::provider_option_names::kUseExternalDataInitializer, info.use_external_data_initializer)
           .AddAssignmentToReference(nv::provider_option_names::kMultiProfileEnable, info.multi_profile_enable)
+          .AddAssignmentToReference(nv::provider_option_names::kRuntimeCacheFile, info.runtime_cache_path)
           .Parse(options));  // add new provider option here.
 
   info.user_compute_stream = user_compute_stream;
@@ -105,7 +106,8 @@ ProviderOptions NvExecutionProviderInfo::ToProviderOptions(const NvExecutionProv
       {nv::provider_option_names::kProfilesMaxShapes, MakeStringWithClassicLocale(info.profile_max_shapes)},
       {nv::provider_option_names::kProfilesOptShapes, MakeStringWithClassicLocale(info.profile_opt_shapes)},
       {nv::provider_option_names::kCudaGraphEnable, MakeStringWithClassicLocale(info.cuda_graph_enable)},
-      {nv::provider_option_names::kUseExternalDataInitializer, MakeStringWithClassicLocale(info.use_external_data_initializer)}};
+      {nv::provider_option_names::kUseExternalDataInitializer, MakeStringWithClassicLocale(info.use_external_data_initializer)},
+      {nv::provider_option_names::kRuntimeCacheFile, MakeStringWithClassicLocale(info.runtime_cache_path)}};
   return options;
 }
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h
index b826925361b05..372e8196f38c2 100644
--- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h
+++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h
@@ -37,7 +37,7 @@ struct NvExecutionProviderInfo {
   bool engine_decryption_enable{false};
   std::string engine_decryption_lib_path{""};
   bool force_sequential_engine_build{false};
-  std::string timing_cache_path{""};
+  std::string runtime_cache_path{""};
   bool detailed_build_log{false};
   bool sparsity_enable{false};
   int auxiliary_streams{-1};
diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_file_utils.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_file_utils.h
new file mode 100644
index 0000000000000..159aba0507ffb
--- /dev/null
+++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_file_utils.h
@@ -0,0 +1,52 @@
+#pragma once
+#include <string>
+#include <fstream>
+#include <filesystem>
+#include <stdexcept>
+#include <vector>
+#include "core/providers/shared_library/provider_api.h"
+
+namespace onnxruntime {
+namespace file_utils {
+
+inline std::vector<char> ReadFile(const std::string& path) {
+  if (!std::filesystem::exists(path)) {
+    LOGS_DEFAULT(INFO) << "TensorRT RTX could not find the file and will create a new one " << path << std::endl;
+    return {};
+  }
+  std::ifstream file(path, std::ios::in | std::ios::binary);
+  if (!file) {
+    ORT_THROW("Failed to open file: " + path);
+  }
+  file.seekg(0, std::ios::end);
+  std::streamsize size = file.tellg();
+  file.seekg(0, std::ios::beg);
+  std::vector<char> buffer(size);
+  if (size > 0 && !file.read(buffer.data(), size)) {
+    ORT_THROW("Failed to read file: " + path);
+  }
+  return buffer;
+}
+
+inline void WriteFile(const std::string& path, const void* data, size_t size) {
+  if (std::filesystem::exists(path)) {
+    std::ofstream file(path, std::ios::out | std::ios::binary | std::ios::trunc);
+    if (!file) {
+      ORT_THROW("Failed to open file for writing: " + path);
+    }
+    file.write(static_cast<const char*>(data), size);
+  } else {
+    LOGS_DEFAULT(INFO) << "TensorRT RTX a new file cache was written to " << path << std::endl;
+    // Create new file
+    std::ofstream file(path, std::ios::out | std::ios::binary);
+    if (!file) {
+      ORT_THROW("Failed to create file: " + path);
+    }
+    file.write(static_cast<const char*>(data), size);
+  }
+}
+
+inline void WriteFile(const std::string& path, const std::vector<char>& data) { WriteFile(path, data.data(), data.size()); }
+
+}  // namespace file_utils
+}  // namespace onnxruntime
\ No newline at end of file
diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/nv_options_test.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/nv_options_test.cc
new file mode 100644
index 0000000000000..d415548876153
--- /dev/null
+++ b/onnxruntime/test/providers/nv_tensorrt_rtx/nv_options_test.cc
@@ -0,0 +1,82 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Licensed under the MIT License.
+#include "core/graph/onnx_protobuf.h"
+#include "core/session/inference_session.h"
+#include "test/providers/provider_test_utils.h"
+#include "test/framework/test_utils.h"
+
+#include "test/util/include/scoped_env_vars.h"
+#include "test/common/trt_op_test_utils.h"
+#include "test/common/random_generator.h"
+#include "test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.h"
+
+#include <thread>
+#include <chrono>
+
+using namespace std;
+using namespace ONNX_NAMESPACE;
+using namespace ::onnxruntime::logging;
+extern std::unique_ptr<Ort::Env> ort_env;
+namespace onnxruntime {
+
+namespace test {
+size_t countFilesInDirectory(const std::string& dir_path) {
+  return std::distance(std::filesystem::directory_iterator(dir_path), std::filesystem::directory_iterator{});
+}
+
+TEST(NvExecutionProviderTest, RuntimeCaching) {
+  PathString model_name = ORT_TSTR("nv_execution_provider_runtime_caching.onnx");
+  PathString model_name_ctx = ORT_TSTR("nv_execution_provider_runtime_caching_ctx.onnx");
+  auto model_name_ctx_str = PathToUTF8(model_name_ctx);
+  clearFileIfExists(model_name_ctx);
+  std::string graph_name = "test";
+  std::vector<int> dims = {1, 3, 2};
+  std::string runtime_cache_name = "./runtime_cache/";
+  if (std::filesystem::exists(runtime_cache_name)) {
+    std::filesystem::remove_all(runtime_cache_name);
+  }
+  CreateBaseModel(model_name, graph_name, dims);
+  // AOT time
+  {
+    Ort::SessionOptions so;
+    Ort::RunOptions run_options;
+    so.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1");
+    so.AddConfigEntry(kOrtSessionOptionEpContextFilePath, model_name_ctx_str.c_str());
+    so.AppendExecutionProvider(kNvTensorRTRTXExecutionProvider, {{"nv_runtime_cache_path", runtime_cache_name.c_str()}});
+    Ort::Session session_object(*ort_env, model_name.c_str(), so);
+
+    auto io_binding = generate_io_binding(session_object);
+    session_object.Run(run_options, io_binding);
+  }
+  // the cache will be dumped to disk upon session destruction
+  ASSERT_TRUE(std::filesystem::exists(runtime_cache_name.c_str()));
+  ASSERT_TRUE(1 == countFilesInDirectory(runtime_cache_name));
+
+  // use existing cache
+  {
+    Ort::SessionOptions so;
+    Ort::RunOptions run_options;
+    so.AppendExecutionProvider(kNvTensorRTRTXExecutionProvider, {{"nv_runtime_cache_path", runtime_cache_name.c_str()}});
+    Ort::Session session_object(*ort_env, model_name_ctx.c_str(), so);
+  }
+  ASSERT_TRUE(1 == countFilesInDirectory(runtime_cache_name));
+
+  // create new cache
+  {
+    Ort::SessionOptions so;
+    Ort::RunOptions run_options;
+    std::string new_cache_name = "/tmp/runtime_cache_new/";
+    if (std::filesystem::exists(new_cache_name)) {
+      std::filesystem::remove_all(new_cache_name);
+    }
+    so.AppendExecutionProvider(kNvTensorRTRTXExecutionProvider, {{"nv_runtime_cache_path", new_cache_name.c_str()}});
+    {
+      Ort::Session session_object(*ort_env, model_name_ctx.c_str(), so);
+    }
+    // the cache will be dumped to disk upon session destruction
+    ASSERT_TRUE(std::filesystem::exists(new_cache_name.c_str()));
+    ASSERT_TRUE(1 == countFilesInDirectory(new_cache_name));
+  }
+}
+}  // namespace test
+}  // namespace onnxruntime