diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake
index 5dcc2b2628bf4..d927489372e7c 100644
--- a/cmake/onnxruntime_common.cmake
+++ b/cmake/onnxruntime_common.cmake
@@ -14,7 +14,7 @@ set(onnxruntime_common_src_patterns
     "${ONNXRUNTIME_ROOT}/core/platform/check_intel.h"
     "${ONNXRUNTIME_ROOT}/core/platform/check_intel.cc"
     "${ONNXRUNTIME_ROOT}/core/platform/device_discovery.h"
-    "${ONNXRUNTIME_ROOT}/core/platform/device_discovery.cc"
+    "${ONNXRUNTIME_ROOT}/core/platform/device_discovery_common.cc"
     "${ONNXRUNTIME_ROOT}/core/platform/env.h"
     "${ONNXRUNTIME_ROOT}/core/platform/env.cc"
     "${ONNXRUNTIME_ROOT}/core/platform/env_time.h"
@@ -32,18 +32,30 @@ set(onnxruntime_common_src_patterns
 
 if(WIN32)
     list(APPEND onnxruntime_common_src_patterns
-         "${ONNXRUNTIME_ROOT}/core/platform/windows/*.h"
-         "${ONNXRUNTIME_ROOT}/core/platform/windows/*.cc"
+         "${ONNXRUNTIME_ROOT}/core/platform/windows/debug_alloc.cc"
+         "${ONNXRUNTIME_ROOT}/core/platform/windows/debug_alloc.h"
+         "${ONNXRUNTIME_ROOT}/core/platform/windows/dll_load_error.cc"
+         "${ONNXRUNTIME_ROOT}/core/platform/windows/dll_load_error.h"
+         "${ONNXRUNTIME_ROOT}/core/platform/windows/env_time.cc"
+         "${ONNXRUNTIME_ROOT}/core/platform/windows/env.cc"
+         "${ONNXRUNTIME_ROOT}/core/platform/windows/env.h"
+         "${ONNXRUNTIME_ROOT}/core/platform/windows/hardware_core_enumerator.cc"
+         "${ONNXRUNTIME_ROOT}/core/platform/windows/hardware_core_enumerator.h"
+         "${ONNXRUNTIME_ROOT}/core/platform/windows/stacktrace.cc"
+         "${ONNXRUNTIME_ROOT}/core/platform/windows/telemetry.cc"
+         "${ONNXRUNTIME_ROOT}/core/platform/windows/telemetry.h"
          "${ONNXRUNTIME_ROOT}/core/platform/windows/logging/*.h"
          "${ONNXRUNTIME_ROOT}/core/platform/windows/logging/*.cc"
     )
 
 else()
     list(APPEND onnxruntime_common_src_patterns
-         "${ONNXRUNTIME_ROOT}/core/platform/posix/*.h"
-         "${ONNXRUNTIME_ROOT}/core/platform/posix/*.cc"
+         "${ONNXRUNTIME_ROOT}/core/platform/posix/env_time.cc"
+         "${ONNXRUNTIME_ROOT}/core/platform/posix/env.cc"
+         "${ONNXRUNTIME_ROOT}/core/platform/posix/stacktrace.cc"
     )
 
+    # logging files
     if (onnxruntime_USE_SYSLOG)
         list(APPEND onnxruntime_common_src_patterns
             "${ONNXRUNTIME_ROOT}/core/platform/posix/logging/*.h"
@@ -51,7 +63,7 @@ else()
         )
     endif()
 
-    if (CMAKE_SYSTEM_NAME STREQUAL "Android")
+    if (ANDROID)
         list(APPEND onnxruntime_common_src_patterns
             "${ONNXRUNTIME_ROOT}/core/platform/android/logging/*.h"
             "${ONNXRUNTIME_ROOT}/core/platform/android/logging/*.cc"
@@ -66,6 +78,21 @@ else()
     endif()
 endif()
 
+# platform-specific device discovery files
+if (WIN32)
+    list(APPEND onnxruntime_common_src_patterns
+         "${ONNXRUNTIME_ROOT}/core/platform/windows/device_discovery.cc")
+elseif (LINUX)
+    list(APPEND onnxruntime_common_src_patterns
+         "${ONNXRUNTIME_ROOT}/core/platform/linux/device_discovery.cc")
+elseif (APPLE)
+    list(APPEND onnxruntime_common_src_patterns
+         "${ONNXRUNTIME_ROOT}/core/platform/apple/device_discovery.cc")
+else()
+    list(APPEND onnxruntime_common_src_patterns
+         "${ONNXRUNTIME_ROOT}/core/platform/device_discovery_default.cc")
+endif()
+
 if(onnxruntime_target_platform STREQUAL "ARM64EC")
     if (MSVC)
         link_directories("$ENV{VCINSTALLDIR}/Tools/MSVC/$ENV{VCToolsVersion}/lib/ARM64EC")
@@ -216,8 +243,6 @@ endif()
 
 if (RISCV64 OR ARM64 OR ARM OR X86 OR X64 OR X86_64)
     # Link cpuinfo if supported
-    # Using it mainly in ARM with Android.
-    # Its functionality in detecting x86 cpu features are lacking, so is support for Windows.
     if (CPUINFO_SUPPORTED)
       onnxruntime_add_include_to_target(onnxruntime_common cpuinfo::cpuinfo)
       list(APPEND onnxruntime_EXTERNAL_LIBRARIES cpuinfo::cpuinfo ${ONNXRUNTIME_CLOG_TARGET_NAME})
diff --git a/include/onnxruntime/core/common/parse_string.h b/include/onnxruntime/core/common/parse_string.h
index 6345b2a55490d..5f88d490b3415 100644
--- a/include/onnxruntime/core/common/parse_string.h
+++ b/include/onnxruntime/core/common/parse_string.h
@@ -35,13 +35,30 @@ template <typename T>
 std::enable_if_t<detail::ParseWithFromChars<T>, bool>
 TryParseStringWithClassicLocale(std::string_view str, T& value) {
   T parsed_value{};
-  const auto [ptr, ec] = std::from_chars(str.data(), str.data() + str.size(), parsed_value);
 
-  if (ec != std::errc{}) {
+  std::from_chars_result conversion_result{};
+  if constexpr (std::is_integral_v<T> && std::is_unsigned_v<T>) {
+    // For unsigned integral types, also handle hex values, i.e., those beginning with "0x".
+    // std::from_chars() does not accept the "0x" prefix.
+    const bool has_hex_prefix = str.size() >= 2 &&
+                                str[0] == '0' &&
+                                (str[1] == 'x' || str[1] == 'X');
+
+    if (has_hex_prefix) {
+      str = str.substr(2);
+    }
+
+    const int base = has_hex_prefix ? 16 : 10;
+    conversion_result = std::from_chars(str.data(), str.data() + str.size(), parsed_value, base);
+  } else {
+    conversion_result = std::from_chars(str.data(), str.data() + str.size(), parsed_value);
+  }
+
+  if (conversion_result.ec != std::errc{}) {
     return false;
   }
 
-  if (ptr != str.data() + str.size()) {
+  if (conversion_result.ptr != str.data() + str.size()) {
     return false;
   }
 
diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc
index dccfdbda8971b..6c66047b4b36a 100644
--- a/onnxruntime/core/common/cpuid_info.cc
+++ b/onnxruntime/core/common/cpuid_info.cc
@@ -1,6 +1,10 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 #include "core/common/cpuid_info.h"
+
+#include <iostream>
+#include <optional>
+
 #include "core/common/logging/logging.h"
 #include "core/common/logging/severity.h"
 #include "core/platform/check_intel.h"
@@ -51,6 +55,14 @@
 
 #endif  // _WIN32
 
+#if defined(__APPLE__)
+#if defined(CPUIDINFO_ARCH_ARM)
+
+#include <sys/sysctl.h>
+
+#endif  // defined(CPUIDINFO_ARCH_ARM)
+#endif  // defined(__APPLE__)
+
 #if defined(CPUINFO_SUPPORTED)
 #include <cpuinfo.h>
 #if defined(CPUIDINFO_ARCH_ARM)
@@ -74,6 +86,14 @@ void decodeMIDR(uint32_t midr, uint32_t uarch[1]);
 
 namespace onnxruntime {
 
+void CPUIDInfo::LogEarlyWarning(std::string_view message) {
+  if (logging::LoggingManager::HasDefaultLogger()) {
+    LOGS_DEFAULT(WARNING) << message;
+  } else {
+    std::cerr << "onnxruntime cpuid_info warning: " << message << "\n";
+  }
+}
+
 #if defined(CPUIDINFO_ARCH_X86)
 
 static inline void GetCPUID(int function_id, int data[4]) {  // NOLINT
@@ -108,9 +128,6 @@ void CPUIDInfo::X86Init() {
   int data[4] = {-1};
   GetCPUID(0, data);
 
-  vendor_ = GetX86Vendor(data);
-  vendor_id_ = GetVendorId(vendor_);
-
   int num_IDs = data[0];
   if (num_IDs >= 1) {
     GetCPUID(1, data);
@@ -158,24 +175,8 @@ void CPUIDInfo::X86Init() {
   }
 }
 
-std::string CPUIDInfo::GetX86Vendor(int32_t* data) {
-  char vendor[sizeof(int32_t) * 3 + 1]{};
-  *reinterpret_cast<int*>(vendor + 0) = data[1];
-  *reinterpret_cast<int*>(vendor + 4) = data[3];
-  *reinterpret_cast<int*>(vendor + 8) = data[2];
-  return vendor;
-}
-
 #endif  // defined(CPUIDINFO_ARCH_X86)
 
-uint32_t CPUIDInfo::GetVendorId(const std::string& vendor) {
-  if (vendor == "GenuineIntel") return 0x8086;
-  if (vendor == "AuthenticAMD") return 0x1022;
-  if (vendor.find("Qualcomm") == 0) return 'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24);
-  if (vendor.find("NV") == 0) return 0x10DE;
-  return 0;
-}
-
 #if defined(CPUIDINFO_ARCH_ARM)
 
 #if defined(__linux__)
@@ -228,10 +229,6 @@ void CPUIDInfo::ArmLinuxInit() {
 #elif defined(_WIN32)  // ^ defined(__linux__)
 
 void CPUIDInfo::ArmWindowsInit() {
-  // Get the ARM vendor string from the registry
-  vendor_ = GetArmWindowsVendor();
-  vendor_id_ = GetVendorId(vendor_);
-
   // Read MIDR and ID_AA64ISAR1_EL1 register values from Windows registry
   // There should be one per CPU
   std::vector<uint64_t> midr_values{}, id_aa64isar1_el1_values{};
@@ -323,15 +320,6 @@ void CPUIDInfo::ArmWindowsInit() {
 #endif  // defined(CPUINFO_SUPPORTED)
 }
 
-std::string CPUIDInfo::GetArmWindowsVendor() {
-  const int MAX_VALUE_NAME = 256;
-  const CHAR vendorKey[] = "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0";
-  CHAR vendorVal[MAX_VALUE_NAME] = "";
-  unsigned long vendorSize = sizeof(char) * MAX_VALUE_NAME;
-  ::RegGetValueA(HKEY_LOCAL_MACHINE, vendorKey, "Vendor Identifier", RRF_RT_REG_SZ | RRF_ZEROONFAILURE, nullptr, &vendorVal, &vendorSize);
-  return vendorVal;
-}
-
 #elif defined(__APPLE__)  // ^ defined(_WIN32)
 
 void CPUIDInfo::ArmAppleInit() {
@@ -376,16 +364,21 @@ uint32_t CPUIDInfo::GetCurrentCoreIdx() const {
 }
 
 CPUIDInfo::CPUIDInfo() {
-#ifdef CPUIDINFO_ARCH_X86
-  X86Init();
-#elif defined(CPUIDINFO_ARCH_ARM)
 #if defined(CPUINFO_SUPPORTED)
   pytorch_cpuinfo_init_ = cpuinfo_initialize();
   if (!pytorch_cpuinfo_init_) {
-    LOGS_DEFAULT(WARNING) << "Failed to initialize PyTorch cpuinfo library. May cause CPU EP performance degradation "
-                             "due to undetected CPU features.";
+    LogEarlyWarning(
+        "Failed to initialize PyTorch cpuinfo library. May cause CPU EP performance degradation due to undetected CPU "
+        "features.");
   }
 #endif  // defined(CPUINFO_SUPPORTED)
+
+  // Note: This should be run after cpuinfo initialization if cpuinfo is enabled.
+  VendorInfoInit();
+
+#ifdef CPUIDINFO_ARCH_X86
+  X86Init();
+#elif defined(CPUIDINFO_ARCH_ARM)
 #if defined(__linux__)
   ArmLinuxInit();
 #elif defined(_WIN32)
diff --git a/onnxruntime/core/common/cpuid_info.h b/onnxruntime/core/common/cpuid_info.h
index 84571fa12e6ea..d49eca7e1d60c 100644
--- a/onnxruntime/core/common/cpuid_info.h
+++ b/onnxruntime/core/common/cpuid_info.h
@@ -103,7 +103,40 @@ class CPUIDInfo {
   }
 
  private:
+  // Log function that uses ORT logging if available or writes to stderr.
+  // This enables us to log even before ORT logging has been initialized.
+  static void LogEarlyWarning(std::string_view message);
+
   CPUIDInfo();
+
+  void VendorInfoInit();
+
+#if defined(CPUIDINFO_ARCH_X86)
+
+  void X86Init();
+
+#elif defined(CPUIDINFO_ARCH_ARM)
+
+#if defined(__linux__)
+
+  void ArmLinuxInit();
+
+#elif defined(_WIN32)
+
+  void ArmWindowsInit();
+
+#elif defined(__APPLE__)
+
+  void ArmAppleInit();
+
+#endif
+
+#endif  // defined(CPUIDINFO_ARCH_ARM)
+
+#if defined(CPUINFO_SUPPORTED)
+  bool pytorch_cpuinfo_init_{false};
+#endif  // defined(CPUINFO_SUPPORTED)
+
   bool has_amx_bf16_{false};
   bool has_avx_{false};
   bool has_avx2_{false};
@@ -132,37 +165,6 @@ class CPUIDInfo {
 
   std::string vendor_;
   uint32_t vendor_id_;
-
-  uint32_t GetVendorId(const std::string& vendor);
-
-#if defined(CPUIDINFO_ARCH_X86)
-
-  void X86Init();
-  std::string GetX86Vendor(int32_t* data);
-
-#elif defined(CPUIDINFO_ARCH_ARM)
-
-#if defined(CPUINFO_SUPPORTED)
-  // Now the following var is only used in ARM build, but later on we may expand the usage.
-  bool pytorch_cpuinfo_init_{false};
-#endif  // defined(CPUINFO_SUPPORTED)
-
-#if defined(__linux__)
-
-  void ArmLinuxInit();
-
-#elif defined(_WIN32)
-
-  void ArmWindowsInit();
-  std::string GetArmWindowsVendor();
-
-#elif defined(__APPLE__)
-
-  void ArmAppleInit();
-
-#endif
-
-#endif  // defined(CPUIDINFO_ARCH_ARM)
 };
 
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/common/cpuid_info_vendor.cc b/onnxruntime/core/common/cpuid_info_vendor.cc
new file mode 100644
index 0000000000000..d4d940eedfe28
--- /dev/null
+++ b/onnxruntime/core/common/cpuid_info_vendor.cc
@@ -0,0 +1,244 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/common/cpuid_info.h"
+
+#include <array>
+#include <algorithm>
+#include <string_view>
+
+#if defined(CPUINFO_SUPPORTED)
+#include "cpuinfo.h"
+#endif
+
+namespace {
+
+#if !defined(CPUINFO_SUPPORTED)
+
+// The `cpuinfo_vendor` enum is defined by the cpuinfo library.
+// In case we don't build with cpuinfo, we define our own copy.
+// The enum was copied from here:
+// https://github.com/pytorch/cpuinfo/blob/8a1772a0c5c447df2d18edf33ec4603a8c9c04a6/include/cpuinfo.h#L154-L307
+
+/** Vendor of processor core design */
+enum cpuinfo_vendor {
+  /** Processor vendor is not known to the library, or the library failed
+     to get vendor information from the OS. */
+  cpuinfo_vendor_unknown = 0,
+
+  /* Active vendors of modern CPUs */
+
+  /**
+   * Intel Corporation. Vendor of x86, x86-64, IA64, and ARM processor
+   * microarchitectures.
+   *
+   * Sold its ARM design subsidiary in 2006. The last ARM processor design
+   * was released in 2004.
+   */
+  cpuinfo_vendor_intel = 1,
+  /** Advanced Micro Devices, Inc. Vendor of x86 and x86-64 processor
+     microarchitectures. */
+  cpuinfo_vendor_amd = 2,
+  /** ARM Holdings plc. Vendor of ARM and ARM64 processor
+     microarchitectures. */
+  cpuinfo_vendor_arm = 3,
+  /** Qualcomm Incorporated. Vendor of ARM and ARM64 processor
+     microarchitectures. */
+  cpuinfo_vendor_qualcomm = 4,
+  /** Apple Inc. Vendor of ARM and ARM64 processor microarchitectures. */
+  cpuinfo_vendor_apple = 5,
+  /** Samsung Electronics Co., Ltd. Vendir if ARM64 processor
+     microarchitectures. */
+  cpuinfo_vendor_samsung = 6,
+  /** Nvidia Corporation. Vendor of ARM64-compatible processor
+     microarchitectures. */
+  cpuinfo_vendor_nvidia = 7,
+  /** MIPS Technologies, Inc. Vendor of MIPS processor microarchitectures.
+   */
+  cpuinfo_vendor_mips = 8,
+  /** International Business Machines Corporation. Vendor of PowerPC
+     processor microarchitectures. */
+  cpuinfo_vendor_ibm = 9,
+  /** Ingenic Semiconductor. Vendor of MIPS processor microarchitectures.
+   */
+  cpuinfo_vendor_ingenic = 10,
+  /**
+   * VIA Technologies, Inc. Vendor of x86 and x86-64 processor
+   * microarchitectures.
+   *
+   * Processors are designed by Centaur Technology, a subsidiary of VIA
+   * Technologies.
+   */
+  cpuinfo_vendor_via = 11,
+  /** Cavium, Inc. Vendor of ARM64 processor microarchitectures. */
+  cpuinfo_vendor_cavium = 12,
+  /** Broadcom, Inc. Vendor of ARM processor microarchitectures. */
+  cpuinfo_vendor_broadcom = 13,
+  /** Applied Micro Circuits Corporation (APM). Vendor of ARM64 processor
+     microarchitectures. */
+  cpuinfo_vendor_apm = 14,
+  /**
+   * Huawei Technologies Co., Ltd. Vendor of ARM64 processor
+   * microarchitectures.
+   *
+   * Processors are designed by HiSilicon, a subsidiary of Huawei.
+   */
+  cpuinfo_vendor_huawei = 15,
+  /**
+   * Hygon (Chengdu Haiguang Integrated Circuit Design Co., Ltd), Vendor
+   * of x86-64 processor microarchitectures.
+   *
+   * Processors are variants of AMD cores.
+   */
+  cpuinfo_vendor_hygon = 16,
+  /** SiFive, Inc. Vendor of RISC-V processor microarchitectures. */
+  cpuinfo_vendor_sifive = 17,
+
+  /* Active vendors of embedded CPUs */
+
+  /** Texas Instruments Inc. Vendor of ARM processor microarchitectures.
+   */
+  cpuinfo_vendor_texas_instruments = 30,
+  /** Marvell Technology Group Ltd. Vendor of ARM processor
+   * microarchitectures.
+   */
+  cpuinfo_vendor_marvell = 31,
+  /** RDC Semiconductor Co., Ltd. Vendor of x86 processor
+     microarchitectures. */
+  cpuinfo_vendor_rdc = 32,
+  /** DM&P Electronics Inc. Vendor of x86 processor microarchitectures. */
+  cpuinfo_vendor_dmp = 33,
+  /** Motorola, Inc. Vendor of PowerPC and ARM processor
+     microarchitectures. */
+  cpuinfo_vendor_motorola = 34,
+
+  /* Defunct CPU vendors */
+
+  /**
+   * Transmeta Corporation. Vendor of x86 processor microarchitectures.
+   *
+   * Now defunct. The last processor design was released in 2004.
+   * Transmeta processors implemented VLIW ISA and used binary translation
+   * to execute x86 code.
+   */
+  cpuinfo_vendor_transmeta = 50,
+  /**
+   * Cyrix Corporation. Vendor of x86 processor microarchitectures.
+   *
+   * Now defunct. The last processor design was released in 1996.
+   */
+  cpuinfo_vendor_cyrix = 51,
+  /**
+   * Rise Technology. Vendor of x86 processor microarchitectures.
+   *
+   * Now defunct. The last processor design was released in 1999.
+   */
+  cpuinfo_vendor_rise = 52,
+  /**
+   * National Semiconductor. Vendor of x86 processor microarchitectures.
+   *
+   * Sold its x86 design subsidiary in 1999. The last processor design was
+   * released in 1998.
+   */
+  cpuinfo_vendor_nsc = 53,
+  /**
+   * Silicon Integrated Systems. Vendor of x86 processor
+   * microarchitectures.
+   *
+   * Sold its x86 design subsidiary in 2001. The last processor design was
+   * released in 2001.
+   */
+  cpuinfo_vendor_sis = 54,
+  /**
+   * NexGen. Vendor of x86 processor microarchitectures.
+   *
+   * Now defunct. The last processor design was released in 1994.
+   * NexGen designed the first x86 microarchitecture which decomposed x86
+   * instructions into simple microoperations.
+   */
+  cpuinfo_vendor_nexgen = 55,
+  /**
+   * United Microelectronics Corporation. Vendor of x86 processor
+   * microarchitectures.
+   *
+   * Ceased x86 in the early 1990s. The last processor design was released
+   * in 1991. Designed U5C and U5D processors. Both are 486 level.
+   */
+  cpuinfo_vendor_umc = 56,
+  /**
+   * Digital Equipment Corporation. Vendor of ARM processor
+   * microarchitecture.
+   *
+   * Sold its ARM designs in 1997. The last processor design was released
+   * in 1997.
+   */
+  cpuinfo_vendor_dec = 57,
+};
+
+#endif  // !defined(CPUINFO_SUPPORTED)
+
+}  // namespace
+
+namespace onnxruntime {
+
+namespace {
+
+struct CpuVendorInfo {
+  cpuinfo_vendor vendor;
+  std::string_view name;
+  uint32_t id;
+};
+
+constexpr auto kUnknownCpuVendorInfo = CpuVendorInfo{cpuinfo_vendor_unknown, "unknown", 0x0000};
+
+constexpr std::array kCpuVendorInfos{
+    CpuVendorInfo{cpuinfo_vendor_amd, "AMD", 0x1022},
+    CpuVendorInfo{cpuinfo_vendor_intel, "Intel", 0x8086},
+    CpuVendorInfo{cpuinfo_vendor_qualcomm, "Qualcomm", uint32_t{'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24)}},
+    CpuVendorInfo{cpuinfo_vendor_nvidia, "Nvidia", 0x10DE},
+    CpuVendorInfo{cpuinfo_vendor_apple, "Apple", 0x106B},
+    CpuVendorInfo{cpuinfo_vendor_arm, "ARM", 0x13B5},
+
+    // TODO add more as needed
+};
+
+const CpuVendorInfo* FindCpuVendorInfo(cpuinfo_vendor vendor) {
+  const auto vendor_mapping_it = std::find_if(kCpuVendorInfos.begin(), kCpuVendorInfos.end(),
+                                              [vendor](const CpuVendorInfo& entry) {
+                                                return entry.vendor == vendor;
+                                              });
+
+  if (vendor_mapping_it != kCpuVendorInfos.end()) {
+    return &*vendor_mapping_it;
+  }
+
+  return nullptr;
+}
+
+}  // namespace
+
+void CPUIDInfo::VendorInfoInit() {
+  const cpuinfo_vendor vendor = [&]() {
+    cpuinfo_vendor result = cpuinfo_vendor_unknown;
+#if defined(CPUINFO_SUPPORTED)
+    if (pytorch_cpuinfo_init_) {
+      const auto* processor = cpuinfo_get_processor(0);
+      if (processor && processor->core) {
+        result = processor->core->vendor;
+      }
+    }
+#endif  // defined(CPUINFO_SUPPORTED)
+    return result;
+  }();
+
+  const auto* vendor_info = FindCpuVendorInfo(vendor);
+  if (vendor_info == nullptr) {
+    LogEarlyWarning(MakeString("Unknown CPU vendor. cpuinfo_vendor value: ", static_cast<int>(vendor)));
+    vendor_info = &kUnknownCpuVendorInfo;
+  }
+
+  vendor_ = vendor_info->name;
+  vendor_id_ = vendor_info->id;
+}
+
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/common/string_utils.h b/onnxruntime/core/common/string_utils.h
index c2e26f629330f..d8d943d6e9a41 100644
--- a/onnxruntime/core/common/string_utils.h
+++ b/onnxruntime/core/common/string_utils.h
@@ -61,10 +61,11 @@ inline void TrimStringFromRight(std::string& s) {
  * @param s The string to trim.
  * @return The trimmed string.
  */
-inline std::string TrimString(std::string s) {
-  TrimStringFromRight(s);
-  TrimStringFromLeft(s);
-  return s;
+inline std::string TrimString(std::string_view s) {
+  std::string s_trimmed{s};
+  TrimStringFromRight(s_trimmed);
+  TrimStringFromLeft(s_trimmed);
+  return s_trimmed;
 }
 
 /**
diff --git a/onnxruntime/core/platform/apple/device_discovery.cc b/onnxruntime/core/platform/apple/device_discovery.cc
new file mode 100644
index 0000000000000..767b834e38756
--- /dev/null
+++ b/onnxruntime/core/platform/apple/device_discovery.cc
@@ -0,0 +1,106 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/platform/device_discovery.h"
+
+#include <sys/utsname.h>
+#include <TargetConditionals.h>
+
+#include "core/common/logging/logging.h"
+
+namespace onnxruntime {
+
+namespace {
+
+constexpr auto kApplePciVendorId = 0x106B;
+constexpr auto kAppleVendorName = "Apple";
+
+std::vector<OrtHardwareDevice> GetGpuDevices() {
+  std::vector<OrtHardwareDevice> result{};
+
+  // For now, we assume the existence of one GPU if it is a Mac with Apple Silicon.
+  // TODO support iOS
+  // TODO support Intel Macs which may have more than one GPU
+#if TARGET_OS_OSX && TARGET_CPU_ARM64
+  {
+    OrtHardwareDevice gpu_device{};
+    gpu_device.type = OrtHardwareDeviceType_GPU;
+    gpu_device.vendor_id = kApplePciVendorId;
+    gpu_device.vendor = kAppleVendorName;
+
+    result.emplace_back(std::move(gpu_device));
+  }
+#endif  // TARGET_OS_OSX && TARGET_CPU_ARM64
+
+  return result;
+}
+
+bool HasAppleNeuralEngine() {
+  // Copied from onnxruntime/core/providers/coreml/builders/helper.cc:HasNeuralEngine().
+  bool has_apple_neural_engine = false;
+
+  struct utsname system_info;
+  uname(&system_info);
+  LOGS_DEFAULT(VERBOSE) << "Current Apple hardware info: " << system_info.machine;
+
+#if TARGET_OS_IPHONE
+  // utsname.machine has device identifier. For example, identifier for iPhone Xs is "iPhone11,2".
+  // Since Neural Engine is only available for use on A12 and later, major device version in the
+  // identifier is checked for these models:
+  // A12: iPhone XS (11,2), iPad Mini - 5th Gen (11,1)
+  // A12X: iPad Pro - 3rd Gen (8,1)
+  // For more information, see https://www.theiphonewiki.com/wiki/Models
+  size_t str_len = strnlen(system_info.machine, onnxruntime::kMaxStrLen);
+  if (str_len > 4 && strncmp("iPad", system_info.machine, 4) == 0) {
+    const int major_version = atoi(system_info.machine + 4);
+    has_apple_neural_engine = major_version >= 8;  // There are no device between iPad 8 and 11.
+  } else if (str_len > 6 && strncmp("iPhone", system_info.machine, 6) == 0) {
+    const int major_version = atoi(system_info.machine + 6);
+    has_apple_neural_engine = major_version >= 11;
+  }
+#elif TARGET_OS_OSX && TARGET_CPU_ARM64
+  // Only Mac with arm64 CPU (Apple Silicon) has ANE.
+  has_apple_neural_engine = true;
+#endif  // #if TARGET_OS_IPHONE
+
+  return has_apple_neural_engine;
+}
+
+std::vector<OrtHardwareDevice> GetNpuDevices() {
+  std::vector<OrtHardwareDevice> result{};
+
+  if (HasAppleNeuralEngine()) {
+    OrtHardwareDevice npu_device{};
+    npu_device.type = OrtHardwareDeviceType_NPU;
+    npu_device.vendor_id = kApplePciVendorId;
+    npu_device.vendor = kAppleVendorName;
+
+    result.emplace_back(std::move(npu_device));
+  }
+
+  return result;
+}
+
+}  // namespace
+
+std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatform() {
+  std::unordered_set<OrtHardwareDevice> devices;
+
+  // get CPU devices
+  devices.insert(GetCpuDeviceFromCPUIDInfo());
+
+  // get GPU devices
+  {
+    auto gpu_devices = GetGpuDevices();
+    devices.insert(gpu_devices.begin(), gpu_devices.end());
+  }
+
+  // get NPU devices
+  {
+    auto npu_devices = GetNpuDevices();
+    devices.insert(npu_devices.begin(), npu_devices.end());
+  }
+
+  return devices;
+}
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/platform/device_discovery.h b/onnxruntime/core/platform/device_discovery.h
index 70be10bf09e4e..b49e63b90236a 100644
--- a/onnxruntime/core/platform/device_discovery.h
+++ b/onnxruntime/core/platform/device_discovery.h
@@ -3,25 +3,24 @@
 
 #pragma once
 
-#include <string>
 #include <unordered_set>
 
 #include "core/session/abi_devices.h"
+
 namespace onnxruntime {
 
 class DeviceDiscovery {
  public:
-  static std::unordered_set<OrtHardwareDevice>& GetDevices() {
-    // assumption: devices don't change. we assume the machine must be shutdown to change cpu/gpu/npu devices.
-    // technically someone could disable/enable a device in a running OS. we choose not to add complexity to support
-    // that scenario.
-    static std::unordered_set<OrtHardwareDevice> devices(DiscoverDevicesForPlatform());
-    return devices;
-  }
+  static const std::unordered_set<OrtHardwareDevice>& GetDevices();
 
  private:
   DeviceDiscovery() = default;
+
   // platform specific code implements this method
   static std::unordered_set<OrtHardwareDevice> DiscoverDevicesForPlatform();
+
+  // Gets a CPU device by querying `CPUIDInfo`.
+  static OrtHardwareDevice GetCpuDeviceFromCPUIDInfo();
 };
+
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/platform/device_discovery_common.cc b/onnxruntime/core/platform/device_discovery_common.cc
new file mode 100644
index 0000000000000..dcba31aed6fec
--- /dev/null
+++ b/onnxruntime/core/platform/device_discovery_common.cc
@@ -0,0 +1,55 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// This file contains platform-agnostic device discovery implementation.
+
+#include "core/platform/device_discovery.h"
+
+#include <sstream>
+
+#include "core/common/cpuid_info.h"
+#include "core/common/logging/logging.h"
+
+namespace onnxruntime {
+
+const std::unordered_set<OrtHardwareDevice>& DeviceDiscovery::GetDevices() {
+  // assumption: devices don't change. we assume the machine must be shutdown to change cpu/gpu/npu devices.
+  // technically someone could disable/enable a device in a running OS. we choose not to add complexity to support
+  // that scenario.
+  static std::unordered_set<OrtHardwareDevice> devices = []() {
+    auto discovered_devices = DiscoverDevicesForPlatform();
+
+    // log discovered devices
+    for (const auto& ortdevice : discovered_devices) {
+      std::ostringstream oss;
+      oss << "Discovered OrtHardwareDevice {vendor_id:0x" << std::hex << ortdevice.vendor_id
+          << ", device_id:0x" << ortdevice.device_id
+          << ", vendor:" << ortdevice.vendor
+          << ", type:" << std::dec << static_cast<int>(ortdevice.type)
+          << ", metadata: [";
+      for (auto& [key, value] : ortdevice.metadata.Entries()) {
+        oss << key << "=" << value << ", ";
+      }
+      oss << "]}";
+      LOGS_DEFAULT(INFO) << oss.str();
+    }
+
+    return discovered_devices;
+  }();
+
+  return devices;
+}
+
+OrtHardwareDevice DeviceDiscovery::GetCpuDeviceFromCPUIDInfo() {
+  const auto& cpuid_info = CPUIDInfo::GetCPUIDInfo();
+
+  OrtHardwareDevice cpu_device{};
+  cpu_device.vendor = cpuid_info.GetCPUVendor();
+  cpu_device.vendor_id = cpuid_info.GetCPUVendorId();
+  cpu_device.device_id = 0;
+  cpu_device.type = OrtHardwareDeviceType_CPU;
+
+  return cpu_device;
+}
+
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/platform/posix/device_discovery.cc b/onnxruntime/core/platform/device_discovery_default.cc
similarity index 57%
rename from onnxruntime/core/platform/posix/device_discovery.cc
rename to onnxruntime/core/platform/device_discovery_default.cc
index 82564539ab5d4..73ddf516034ab 100644
--- a/onnxruntime/core/platform/posix/device_discovery.cc
+++ b/onnxruntime/core/platform/device_discovery_default.cc
@@ -4,14 +4,16 @@
 #include "core/platform/device_discovery.h"
 
 namespace onnxruntime {
+
 std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatform() {
-  std::unordered_set<OrtHardwareDevice> devices;
-  // get CPU devices
+  // This is a default implementation.
+  // We assume that there is a CPU device and do not attempt to discover anything else.
 
-  // get GPU devices
+  std::unordered_set<OrtHardwareDevice> devices{};
 
-  // get NPU devices
+  devices.emplace(GetCpuDeviceFromCPUIDInfo());
 
   return devices;
 }
+
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/platform/linux/device_discovery.cc b/onnxruntime/core/platform/linux/device_discovery.cc
new file mode 100644
index 0000000000000..6a02a1b46028f
--- /dev/null
+++ b/onnxruntime/core/platform/linux/device_discovery.cc
@@ -0,0 +1,173 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/platform/device_discovery.h"
+
+#include <filesystem>
+#include <fstream>
+#include <iterator>
+#include <string_view>
+
+#include "core/common/common.h"
+#include "core/common/logging/logging.h"
+#include "core/common/parse_string.h"
+#include "core/common/string_utils.h"
+
+namespace fs = std::filesystem;
+
+namespace onnxruntime {
+
+namespace {
+
+Status ErrorCodeToStatus(const std::error_code& ec) {
+  if (!ec) {
+    return Status::OK();
+  }
+
+  return Status{common::StatusCategory::ONNXRUNTIME, common::StatusCode::FAIL,
+                MakeString("Error: std::error_code with category name: ", ec.category().name(),
+                           ", value: ", ec.value(), ", message: ", ec.message())};
+}
+
+struct GpuSysfsPathInfo {
+  size_t card_idx;
+  fs::path path;
+};
+
+Status DetectGpuSysfsPaths(std::vector<GpuSysfsPathInfo>& gpu_sysfs_paths_out) {
+  std::error_code error_code{};
+  const fs::path sysfs_class_drm_path = "/sys/class/drm";
+  const bool sysfs_class_drm_path_exists = fs::exists(sysfs_class_drm_path, error_code);
+  ORT_RETURN_IF_ERROR(ErrorCodeToStatus(error_code));
+
+  if (!sysfs_class_drm_path_exists) {
+    gpu_sysfs_paths_out = std::vector<GpuSysfsPathInfo>{};
+    return Status::OK();
+  }
+
+  const auto detect_card_path = [](const fs::path& sysfs_path, size_t& card_idx) -> bool {
+    const auto filename = sysfs_path.filename();
+    const auto filename_str = std::string_view{filename.native()};
+
+    // Look for a filename matching "cardN". N is a number.
+    constexpr std::string_view prefix = "card";
+    if (filename_str.find(prefix) != 0) {
+      return false;
+    }
+
+    size_t parsed_card_idx{};
+    if (!TryParseStringWithClassicLocale<size_t>(filename_str.substr(prefix.size()), parsed_card_idx)) {
+      return false;
+    }
+
+    card_idx = parsed_card_idx;
+    return true;
+  };
+
+  std::vector<GpuSysfsPathInfo> gpu_sysfs_paths{};
+
+  auto dir_iterator = fs::directory_iterator{sysfs_class_drm_path, error_code};
+  ORT_RETURN_IF_ERROR(ErrorCodeToStatus(error_code));
+
+  for (const auto& dir_item : dir_iterator) {
+    const auto& dir_item_path = dir_item.path();
+
+    if (size_t card_idx{}; detect_card_path(dir_item_path, card_idx)) {
+      GpuSysfsPathInfo path_info{};
+      path_info.card_idx = card_idx;
+      path_info.path = dir_item_path;
+      gpu_sysfs_paths.emplace_back(std::move(path_info));
+    }
+  }
+
+  gpu_sysfs_paths_out = std::move(gpu_sysfs_paths);
+  return Status::OK();
+}
+
+Status ReadFileContents(const fs::path& file_path, std::string& contents) {
+  std::ifstream file{file_path};
+  ORT_RETURN_IF_NOT(file, "Failed to open file: ", file_path);
+  std::istreambuf_iterator<char> file_begin{file}, file_end{};
+  contents.assign(file_begin, file_end);
+  return Status::OK();
+}
+
+template <typename ValueType>
+Status ReadValueFromFile(const fs::path& file_path, ValueType& value) {
+  std::string file_text{};
+  ORT_RETURN_IF_ERROR(ReadFileContents(file_path, file_text));
+  file_text = utils::TrimString(file_text);
+  return ParseStringWithClassicLocale<ValueType>(file_text, value);
+}
+
+Status GetGpuDeviceFromSysfs(const GpuSysfsPathInfo& path_info, OrtHardwareDevice& gpu_device_out) {
+  OrtHardwareDevice gpu_device{};
+  const auto& sysfs_path = path_info.path;
+
+  // vendor id
+  {
+    const auto vendor_id_path = sysfs_path / "device" / "vendor";
+    ORT_RETURN_IF_ERROR(ReadValueFromFile(vendor_id_path, gpu_device.vendor_id));
+  }
+
+  // TODO vendor name
+
+  // device id
+  {
+    const auto device_id_path = sysfs_path / "device" / "device";
+    ORT_RETURN_IF_ERROR(ReadValueFromFile(device_id_path, gpu_device.device_id));
+  }
+
+  // metadata
+  gpu_device.metadata.Add("card_idx", MakeString(path_info.card_idx));
+  // TODO is card discrete?
+
+  gpu_device.type = OrtHardwareDeviceType_GPU;
+
+  gpu_device_out = std::move(gpu_device);
+  return Status::OK();
+}
+
+Status GetGpuDevices(std::vector<OrtHardwareDevice>& gpu_devices_out) {
+  std::vector<GpuSysfsPathInfo> gpu_sysfs_path_infos{};
+  ORT_RETURN_IF_ERROR(DetectGpuSysfsPaths(gpu_sysfs_path_infos));
+
+  std::vector<OrtHardwareDevice> gpu_devices{};
+  gpu_devices.reserve(gpu_sysfs_path_infos.size());
+
+  for (const auto& gpu_sysfs_path_info : gpu_sysfs_path_infos) {
+    OrtHardwareDevice gpu_device{};
+    ORT_RETURN_IF_ERROR(GetGpuDeviceFromSysfs(gpu_sysfs_path_info, gpu_device));
+    gpu_devices.emplace_back(std::move(gpu_device));
+  }
+
+  gpu_devices_out = std::move(gpu_devices);
+  return Status::OK();
+}
+
+}  // namespace
+
+std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatform() {
+  std::unordered_set<OrtHardwareDevice> devices;
+
+  // get CPU devices
+  devices.emplace(GetCpuDeviceFromCPUIDInfo());
+
+  // get GPU devices
+  {
+    std::vector<OrtHardwareDevice> gpu_devices{};
+    Status gpu_device_discovery_status = GetGpuDevices(gpu_devices);
+    if (gpu_device_discovery_status.IsOK()) {
+      devices.insert(std::make_move_iterator(gpu_devices.begin()),
+                     std::make_move_iterator(gpu_devices.end()));
+    } else {
+      LOGS_DEFAULT(WARNING) << "GPU device discovery failed: " << gpu_device_discovery_status.ErrorMessage();
+    }
+  }
+
+  // get NPU devices
+  // TODO figure out how to discover these
+
+  return devices;
+}
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/platform/windows/device_discovery.cc b/onnxruntime/core/platform/windows/device_discovery.cc
index ff904ddb3e7e0..cf761f587ad0b 100644
--- a/onnxruntime/core/platform/windows/device_discovery.cc
+++ b/onnxruntime/core/platform/windows/device_discovery.cc
@@ -635,19 +635,6 @@ std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatfor
       }
     }
 
-    std::ostringstream oss;
-    oss << "Adding OrtHardwareDevice {vendor_id:0x" << std::hex << ortdevice.vendor_id
-        << ", device_id:0x" << ortdevice.device_id
-        << ", vendor:" << ortdevice.vendor
-        << ", type:" << std::dec << static_cast<int>(ortdevice.type)
-        << ", metadata: [";
-    for (auto& [key, value] : ortdevice.metadata.Entries()) {
-      oss << key << "=" << value << ", ";
-    }
-
-    oss << "]}" << std::endl;
-    LOGS_DEFAULT(INFO) << oss.str();
-
     return ortdevice;
   };
 
diff --git a/onnxruntime/test/common/string_utils_test.cc b/onnxruntime/test/common/string_utils_test.cc
index 79f8ddff7b52a..983f7fa7a87f9 100644
--- a/onnxruntime/test/common/string_utils_test.cc
+++ b/onnxruntime/test/common/string_utils_test.cc
@@ -15,6 +15,8 @@ namespace test {
 namespace {
 template <typename T>
 void TestSuccessfulParse(const std::string& input, const T& expected_value) {
+  SCOPED_TRACE(MakeString("Input: \"", input, "\", expected_value: ", expected_value));
+
   T value;
   ASSERT_TRUE(TryParseStringWithClassicLocale(input, value));
   EXPECT_EQ(value, expected_value);
@@ -22,6 +24,8 @@ void TestSuccessfulParse(const std::string& input, const T& expected_value) {
 
 template <typename T>
 void TestFailedParse(const std::string& input) {
+  SCOPED_TRACE(MakeString("Input: \"", input, "\""));
+
   T value;
   EXPECT_FALSE(TryParseStringWithClassicLocale(input, value));
 }
@@ -31,6 +35,7 @@ TEST(StringUtilsTest, TryParseStringWithClassicLocale) {
   TestSuccessfulParse("-1", -1);
   TestSuccessfulParse("42", 42u);
   TestSuccessfulParse("2.5", 2.5f);
+  TestSuccessfulParse("0x100", uint32_t{0x100});
 
   // out of range
   TestFailedParse<int16_t>("32768");
diff --git a/onnxruntime/test/platform/device_discovery_test.cc b/onnxruntime/test/platform/device_discovery_test.cc
new file mode 100644
index 0000000000000..21ddf9a5b1cd7
--- /dev/null
+++ b/onnxruntime/test/platform/device_discovery_test.cc
@@ -0,0 +1,33 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/platform/device_discovery.h"
+
+#include "gtest/gtest.h"
+
+namespace onnxruntime::test {
+
+namespace {
+
+std::vector<OrtHardwareDevice> GetDevicesByType(OrtHardwareDeviceType device_type) {
+  std::vector<OrtHardwareDevice> result{};
+  const auto& devices = DeviceDiscovery::GetDevices();
+  std::copy_if(devices.begin(), devices.end(), std::back_inserter(result),
+               [device_type](const OrtHardwareDevice& device) {
+                 return device.type == device_type;
+               });
+  return result;
+}
+
+}  // namespace
+
+TEST(DeviceDiscoveryTest, HasCpuDevice) {
+  const auto cpu_devices = GetDevicesByType(OrtHardwareDeviceType_CPU);
+  ASSERT_GT(cpu_devices.size(), 0);
+
+#if !defined(__wasm__)
+  ASSERT_NE(cpu_devices[0].vendor_id, 0);
+#endif  // !defined(__WASM__)
+}
+
+}  // namespace onnxruntime::test
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index c8ef3e22b43f1..dd3e096c0334b 100644
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -1515,8 +1515,8 @@ def adb_push(src, dest, **kwargs):
     def adb_shell(*args, **kwargs):
         return run_subprocess([sdk_tool_paths.adb, "shell", *args], **kwargs)
 
-    def adb_install(*args, **kwargs):
-        return run_subprocess([sdk_tool_paths.adb, "install", *args], **kwargs)
+    def adb_logcat(*args, **kwargs):
+        return run_subprocess([sdk_tool_paths.adb, "logcat", *args], **kwargs)
 
     def run_adb_shell(cmd):
         # GCOV_PREFIX_STRIP specifies the depth of the directory hierarchy to strip and
@@ -1542,6 +1542,17 @@ def run_adb_shell(cmd):
             )
             context_stack.callback(android.stop_emulator, emulator_proc)
 
+        all_android_tests_passed = False
+
+        def dump_logs_on_failure():
+            if not all_android_tests_passed:
+                log.warning("Android test failed. Dumping logs.")
+                adb_logcat("-d")  # dump logs
+
+        context_stack.callback(dump_logs_on_failure)
+
+        adb_logcat("-c")  # clear logs
+
         adb_push("testdata", device_dir, cwd=cwd)
         if is_linux() and os.path.exists("/data/onnx"):
             adb_push("/data/onnx", device_dir + "/test", cwd=cwd)
@@ -1593,6 +1604,8 @@ def run_adb_shell(cmd):
                 f"LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{device_dir} {device_dir}/onnxruntime_customopregistration_test"
             )
 
+        all_android_tests_passed = True
+
 
 def run_ios_tests(args, source_dir, config, cwd):
     is_targeting_iphone_simulator = "iphonesimulator" in args.apple_sysroot.lower()