From f5b4d8a1e682b5e8ebc7b94df7b4dc4e38d2083e Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 11 Jun 2025 11:26:42 -0700 Subject: [PATCH 01/39] add apple device_discovery.cc --- cmake/onnxruntime_common.cmake | 20 +++++++++++++++---- .../core/platform/apple/device_discovery.cc | 18 +++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 onnxruntime/core/platform/apple/device_discovery.cc diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake index 1e26eede8a66f..013ad025007b7 100644 --- a/cmake/onnxruntime_common.cmake +++ b/cmake/onnxruntime_common.cmake @@ -14,7 +14,6 @@ set(onnxruntime_common_src_patterns "${ONNXRUNTIME_ROOT}/core/platform/check_intel.h" "${ONNXRUNTIME_ROOT}/core/platform/check_intel.cc" "${ONNXRUNTIME_ROOT}/core/platform/device_discovery.h" - "${ONNXRUNTIME_ROOT}/core/platform/device_discovery.cc" "${ONNXRUNTIME_ROOT}/core/platform/env.h" "${ONNXRUNTIME_ROOT}/core/platform/env.cc" "${ONNXRUNTIME_ROOT}/core/platform/env_time.h" @@ -40,10 +39,23 @@ if(WIN32) else() list(APPEND onnxruntime_common_src_patterns - "${ONNXRUNTIME_ROOT}/core/platform/posix/*.h" - "${ONNXRUNTIME_ROOT}/core/platform/posix/*.cc" + "${ONNXRUNTIME_ROOT}/core/platform/posix/device_discovery.cc" + "${ONNXRUNTIME_ROOT}/core/platform/posix/env_time.cc" + "${ONNXRUNTIME_ROOT}/core/platform/posix/env.cc" + "${ONNXRUNTIME_ROOT}/core/platform/posix/stacktrace.cc" ) + # device discovery files + if (LINUX OR ANDROID) + list(APPEND onnxruntime_common_src_patterns + "${ONNXRUNTIME_ROOT}/core/platform/posix/device_discovery.cc") + elseif(APPLE) + list(APPEND onnxruntime_common_src_patterns + "${ONNXRUNTIME_ROOT}/core/platform/apple/device_discovery.cc") + endif() + + + # logging files if (onnxruntime_USE_SYSLOG) list(APPEND onnxruntime_common_src_patterns "${ONNXRUNTIME_ROOT}/core/platform/posix/logging/*.h" @@ -51,7 +63,7 @@ else() ) endif() - if (CMAKE_SYSTEM_NAME STREQUAL "Android") + if (ANDROID) list(APPEND onnxruntime_common_src_patterns "${ONNXRUNTIME_ROOT}/core/platform/android/logging/*.h" "${ONNXRUNTIME_ROOT}/core/platform/android/logging/*.cc" diff --git a/onnxruntime/core/platform/apple/device_discovery.cc b/onnxruntime/core/platform/apple/device_discovery.cc new file mode 100644 index 0000000000000..905306ef67b6f --- /dev/null +++ b/onnxruntime/core/platform/apple/device_discovery.cc @@ -0,0 +1,18 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/platform/device_discovery.h" + +namespace onnxruntime { +std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatform() { + std::unordered_set devices; + + // get CPU devices + + // get GPU devices + + // get NPU devices + + return devices; +} +} // namespace onnxruntime From f58ea4dc8ee00614a6c268b5cecc53ed3c8c8304 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 12 Jun 2025 14:07:41 -0700 Subject: [PATCH 02/39] log discovered devices across platforms --- cmake/onnxruntime_common.cmake | 1 + onnxruntime/core/platform/device_discovery.cc | 40 +++++++++++++++++++ onnxruntime/core/platform/device_discovery.h | 11 ++--- .../core/platform/windows/device_discovery.cc | 13 ------ 4 files changed, 44 insertions(+), 21 deletions(-) create mode 100644 onnxruntime/core/platform/device_discovery.cc diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake index 013ad025007b7..f5503181ff1e8 100644 --- a/cmake/onnxruntime_common.cmake +++ b/cmake/onnxruntime_common.cmake @@ -14,6 +14,7 @@ set(onnxruntime_common_src_patterns "${ONNXRUNTIME_ROOT}/core/platform/check_intel.h" "${ONNXRUNTIME_ROOT}/core/platform/check_intel.cc" "${ONNXRUNTIME_ROOT}/core/platform/device_discovery.h" + "${ONNXRUNTIME_ROOT}/core/platform/device_discovery.cc" "${ONNXRUNTIME_ROOT}/core/platform/env.h" "${ONNXRUNTIME_ROOT}/core/platform/env.cc" "${ONNXRUNTIME_ROOT}/core/platform/env_time.h" diff --git a/onnxruntime/core/platform/device_discovery.cc b/onnxruntime/core/platform/device_discovery.cc new file mode 100644 index 0000000000000..9941ebbcadbfc --- /dev/null +++ b/onnxruntime/core/platform/device_discovery.cc @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/platform/device_discovery.h" + +#include + +#include "core/common/logging/logging.h" + +namespace onnxruntime { + +const std::unordered_set& DeviceDiscovery::GetDevices() { + // assumption: devices don't change. we assume the machine must be shutdown to change cpu/gpu/npu devices. + // technically someone could disable/enable a device in a running OS. we choose not to add complexity to support + // that scenario. + static std::unordered_set devices = []() { + auto discovered_devices = DiscoverDevicesForPlatform(); + + // log discovered devices + for (const auto& ortdevice : discovered_devices) { + std::ostringstream oss; + oss << "Adding OrtHardwareDevice {vendor_id:0x" << std::hex << ortdevice.vendor_id + << ", device_id:0x" << ortdevice.device_id + << ", vendor:" << ortdevice.vendor + << ", type:" << std::dec << static_cast(ortdevice.type) + << ", metadata: ["; + for (auto& [key, value] : ortdevice.metadata.entries) { + oss << key << "=" << value << ", "; + } + oss << "]}"; + LOGS_DEFAULT(INFO) << oss.str(); + } + + return discovered_devices; + }(); + + return devices; +} + +} // namespace onnxruntime diff --git a/onnxruntime/core/platform/device_discovery.h b/onnxruntime/core/platform/device_discovery.h index 70be10bf09e4e..1c32c7a29de14 100644 --- a/onnxruntime/core/platform/device_discovery.h +++ b/onnxruntime/core/platform/device_discovery.h @@ -3,24 +3,19 @@ #pragma once -#include #include #include "core/session/abi_devices.h" + namespace onnxruntime { class DeviceDiscovery { public: - static std::unordered_set& GetDevices() { - // assumption: devices don't change. we assume the machine must be shutdown to change cpu/gpu/npu devices. - // technically someone could disable/enable a device in a running OS. we choose not to add complexity to support - // that scenario. - static std::unordered_set devices(DiscoverDevicesForPlatform()); - return devices; - } + static const std::unordered_set& GetDevices(); private: DeviceDiscovery() = default; + // platform specific code implements this method static std::unordered_set DiscoverDevicesForPlatform(); }; diff --git a/onnxruntime/core/platform/windows/device_discovery.cc b/onnxruntime/core/platform/windows/device_discovery.cc index fdd4fa5b815d6..46d2fb1cad290 100644 --- a/onnxruntime/core/platform/windows/device_discovery.cc +++ b/onnxruntime/core/platform/windows/device_discovery.cc @@ -574,19 +574,6 @@ std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatfor } } - std::ostringstream oss; - oss << "Adding OrtHardwareDevice {vendor_id:0x" << std::hex << ortdevice.vendor_id - << ", device_id:0x" << ortdevice.device_id - << ", vendor:" << ortdevice.vendor - << ", type:" << std::dec << static_cast(ortdevice.type) - << ", metadata: ["; - for (auto& [key, value] : ortdevice.metadata.entries) { - oss << key << "=" << value << ", "; - } - - oss << "]}" << std::endl; - LOGS_DEFAULT(INFO) << oss.str(); - return ortdevice; }; From d7b598dd85342d8d38f6fa45a7aeb7a7cc6fef68 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Fri, 13 Jun 2025 12:53:49 -0700 Subject: [PATCH 03/39] save work - linux CPU discovery --- cmake/onnxruntime_common.cmake | 41 ++++++++--- onnxruntime/core/common/cpuid_info.cc | 21 +++++- onnxruntime/core/common/cpuid_info.h | 1 + onnxruntime/core/common/string_utils.h | 9 ++- onnxruntime/core/platform/device_discovery.cc | 2 +- .../core/platform/device_discovery_default.cc | 2 + onnxruntime/core/platform/linux/cpuinfo.cc | 73 +++++++++++++++++++ onnxruntime/core/platform/linux/cpuinfo.h | 29 ++++++++ .../core/platform/linux/device_discovery.cc | 37 ++++++++++ .../core/platform/posix/device_discovery.cc | 17 ----- .../test/platform/device_discovery_test.cc | 29 ++++++++ 11 files changed, 228 insertions(+), 33 deletions(-) create mode 100644 onnxruntime/core/platform/device_discovery_default.cc create mode 100644 onnxruntime/core/platform/linux/cpuinfo.cc create mode 100644 onnxruntime/core/platform/linux/cpuinfo.h create mode 100644 onnxruntime/core/platform/linux/device_discovery.cc delete mode 100644 onnxruntime/core/platform/posix/device_discovery.cc create mode 100644 onnxruntime/test/platform/device_discovery_test.cc diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake index f5503181ff1e8..4de0ea5cfd18a 100644 --- a/cmake/onnxruntime_common.cmake +++ b/cmake/onnxruntime_common.cmake @@ -32,30 +32,36 @@ set(onnxruntime_common_src_patterns if(WIN32) list(APPEND onnxruntime_common_src_patterns - "${ONNXRUNTIME_ROOT}/core/platform/windows/*.h" - "${ONNXRUNTIME_ROOT}/core/platform/windows/*.cc" + "${ONNXRUNTIME_ROOT}/core/platform/windows/debug_alloc.cc" + "${ONNXRUNTIME_ROOT}/core/platform/windows/debug_alloc.h" + "${ONNXRUNTIME_ROOT}/core/platform/windows/dll_load_error.cc" + "${ONNXRUNTIME_ROOT}/core/platform/windows/dll_load_error.h" + "${ONNXRUNTIME_ROOT}/core/platform/windows/env_time.cc" + "${ONNXRUNTIME_ROOT}/core/platform/windows/env.cc" + "${ONNXRUNTIME_ROOT}/core/platform/windows/env.h" + "${ONNXRUNTIME_ROOT}/core/platform/windows/hardware_core_enumerator.cc" + "${ONNXRUNTIME_ROOT}/core/platform/windows/hardware_core_enumerator.h" + "${ONNXRUNTIME_ROOT}/core/platform/windows/stacktrace.cc" + "${ONNXRUNTIME_ROOT}/core/platform/windows/telemetry.cc" + "${ONNXRUNTIME_ROOT}/core/platform/windows/telemetry.h" "${ONNXRUNTIME_ROOT}/core/platform/windows/logging/*.h" "${ONNXRUNTIME_ROOT}/core/platform/windows/logging/*.cc" ) else() list(APPEND onnxruntime_common_src_patterns - "${ONNXRUNTIME_ROOT}/core/platform/posix/device_discovery.cc" "${ONNXRUNTIME_ROOT}/core/platform/posix/env_time.cc" "${ONNXRUNTIME_ROOT}/core/platform/posix/env.cc" "${ONNXRUNTIME_ROOT}/core/platform/posix/stacktrace.cc" ) - # device discovery files - if (LINUX OR ANDROID) + if(LINUX) list(APPEND onnxruntime_common_src_patterns - "${ONNXRUNTIME_ROOT}/core/platform/posix/device_discovery.cc") - elseif(APPLE) - list(APPEND onnxruntime_common_src_patterns - "${ONNXRUNTIME_ROOT}/core/platform/apple/device_discovery.cc") + "${ONNXRUNTIME_ROOT}/core/platform/linux/cpuinfo.h" + "${ONNXRUNTIME_ROOT}/core/platform/linux/cpuinfo.cc" + ) endif() - # logging files if (onnxruntime_USE_SYSLOG) list(APPEND onnxruntime_common_src_patterns @@ -79,6 +85,21 @@ else() endif() endif() +# platform-specific device discovery files +if (WIN32) + list(APPEND onnxruntime_common_src_patterns + "${ONNXRUNTIME_ROOT}/core/platform/windows/device_discovery.cc") +elseif (LINUX OR ANDROID) + list(APPEND onnxruntime_common_src_patterns + "${ONNXRUNTIME_ROOT}/core/platform/linux/device_discovery.cc") +elseif (APPLE) + list(APPEND onnxruntime_common_src_patterns + "${ONNXRUNTIME_ROOT}/core/platform/apple/device_discovery.cc") +else() + list(APPEND onnxruntime_common_src_patterns + "${ONNXRUNTIME_ROOT}/core/platform/device_discovery_default.cc") +endif() + if(onnxruntime_target_platform STREQUAL "ARM64EC") if (MSVC) link_directories("$ENV{VCINSTALLDIR}/Tools/MSVC/$ENV{VCToolsVersion}/lib/ARM64EC") diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index 8ea593f107833..d876a88446cd1 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -39,6 +39,8 @@ #endif // ARM +#include "core/platform/linux/cpuinfo.h" + #endif // Linux #if _WIN32 @@ -170,7 +172,7 @@ std::string CPUIDInfo::GetX86Vendor(int32_t* data) { uint32_t CPUIDInfo::GetVendorId(const std::string& vendor) { if (vendor == "GenuineIntel") return 0x8086; - if (vendor == "GenuineAMD") return 0x1022; + if (vendor == "AuthenticAMD") return 0x1022; if (vendor.find("Qualcomm") == 0) return 'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24); if (vendor.find("NV") == 0) return 0x10DE; return 0; @@ -224,6 +226,23 @@ void CPUIDInfo::ArmLinuxInit() { } } +std::string CPUIDInfo::GetArmLinuxVendor() { + std::string vendor{}; + + CpuInfo cpu_info{}; + Status parse_status = ParseCpuInfoFile(cpu_info); + if (!parse_status.IsOK()) { + LOGS_DEFAULT(WARNING) << "Failed to parse /proc/cpuinfo file. Error: " << parse_status; + } + + if (cpu_info.size() > 0) { + // just use the vendor from the first processor's information + vendor = cpu_info[0].vendor_id; + } + + return vendor; +} + #elif defined(_WIN32) // ^ defined(__linux__) void CPUIDInfo::ArmWindowsInit() { diff --git a/onnxruntime/core/common/cpuid_info.h b/onnxruntime/core/common/cpuid_info.h index 9c67ebbffa260..cef50c159a718 100644 --- a/onnxruntime/core/common/cpuid_info.h +++ b/onnxruntime/core/common/cpuid_info.h @@ -148,6 +148,7 @@ class CPUIDInfo { #if defined(__linux__) void ArmLinuxInit(); + std::string GetArmLinuxVendor(); #elif defined(_WIN32) diff --git a/onnxruntime/core/common/string_utils.h b/onnxruntime/core/common/string_utils.h index c2e26f629330f..d8d943d6e9a41 100644 --- a/onnxruntime/core/common/string_utils.h +++ b/onnxruntime/core/common/string_utils.h @@ -61,10 +61,11 @@ inline void TrimStringFromRight(std::string& s) { * @param s The string to trim. * @return The trimmed string. */ -inline std::string TrimString(std::string s) { - TrimStringFromRight(s); - TrimStringFromLeft(s); - return s; +inline std::string TrimString(std::string_view s) { + std::string s_trimmed{s}; + TrimStringFromRight(s_trimmed); + TrimStringFromLeft(s_trimmed); + return s_trimmed; } /** diff --git a/onnxruntime/core/platform/device_discovery.cc b/onnxruntime/core/platform/device_discovery.cc index 9941ebbcadbfc..decc25cc4f089 100644 --- a/onnxruntime/core/platform/device_discovery.cc +++ b/onnxruntime/core/platform/device_discovery.cc @@ -19,7 +19,7 @@ const std::unordered_set& DeviceDiscovery::GetDevices() { // log discovered devices for (const auto& ortdevice : discovered_devices) { std::ostringstream oss; - oss << "Adding OrtHardwareDevice {vendor_id:0x" << std::hex << ortdevice.vendor_id + oss << "Discovered OrtHardwareDevice {vendor_id:0x" << std::hex << ortdevice.vendor_id << ", device_id:0x" << ortdevice.device_id << ", vendor:" << ortdevice.vendor << ", type:" << std::dec << static_cast(ortdevice.type) diff --git a/onnxruntime/core/platform/device_discovery_default.cc b/onnxruntime/core/platform/device_discovery_default.cc new file mode 100644 index 0000000000000..d194bed477a31 --- /dev/null +++ b/onnxruntime/core/platform/device_discovery_default.cc @@ -0,0 +1,2 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. diff --git a/onnxruntime/core/platform/linux/cpuinfo.cc b/onnxruntime/core/platform/linux/cpuinfo.cc new file mode 100644 index 0000000000000..51fa3282534ba --- /dev/null +++ b/onnxruntime/core/platform/linux/cpuinfo.cc @@ -0,0 +1,73 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/platform/linux/cpuinfo.h" + +#include +#include +#include + +#include "core/common/string_utils.h" +#include "core/common/parse_string.h" + +namespace onnxruntime { + +namespace { +using KeyValuePairs = std::map>; + +Status GetValue(const KeyValuePairs& key_value_pairs, std::string_view key, + std::string_view& value) { + auto it = key_value_pairs.find(key); + ORT_RETURN_IF(it == key_value_pairs.end(), "Failed to find key: ", key); + value = it->second; + return Status::OK(); +} +} // namespace + +Status ParseCpuInfoFile(const std::string& cpu_info_file, CpuInfo& cpu_info_out) { + std::ifstream in{cpu_info_file}; + + ORT_RETURN_IF_NOT(in, "Failed to open file: ", cpu_info_file); + + CpuInfo cpu_info{}; + KeyValuePairs key_value_pairs{}; + + auto add_processor_info = [&]() -> Status { + if (!key_value_pairs.empty()) { + std::string_view value{}; + CpuInfoFileProcessorInfo processor_info{}; + + ORT_RETURN_IF_ERROR(GetValue(key_value_pairs, "processor", value)); + ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value, processor_info.processor)); + + ORT_RETURN_IF_ERROR(GetValue(key_value_pairs, "vendor", value)); + processor_info.vendor_id = std::string{value}; + + cpu_info.emplace_back(std::move(processor_info)); + + key_value_pairs.clear(); + } + return Status::OK(); + }; + + for (std::string line{}; std::getline(in, line);) { + line = utils::TrimString(line); + + if (line.empty()) { + ORT_RETURN_IF_ERROR(add_processor_info()); + continue; + } + + auto parts = utils::SplitString(line, ":"); + ORT_RETURN_IF_NOT(parts.size() == 2, "Unexpected format. Line: '", line, "'"); + + key_value_pairs.emplace(utils::TrimString(parts[0]), utils::TrimString(parts[1])); + } + + ORT_RETURN_IF_ERROR(add_processor_info()); + + cpu_info_out = std::move(cpu_info); + return Status::OK(); +} + +} // namespace onnxruntime diff --git a/onnxruntime/core/platform/linux/cpuinfo.h b/onnxruntime/core/platform/linux/cpuinfo.h new file mode 100644 index 0000000000000..825ca8c4e4e6b --- /dev/null +++ b/onnxruntime/core/platform/linux/cpuinfo.h @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include + +#include "core/common/status.h" + +#include + +namespace onnxruntime { + +struct CpuInfoFileProcessorInfo { + size_t processor; + std::string vendor_id; + + // There are plenty of other fields. We can add more if needed. +}; + +using CpuInfo = std::vector; + +Status ParseCpuInfoFile(const std::string& cpu_info_file, CpuInfo& cpu_info); + +inline Status ParseCpuInfoFile(CpuInfo& cpu_info) { + return ParseCpuInfoFile("/proc/cpuinfo", cpu_info); +} + +} // namespace onnxruntime diff --git a/onnxruntime/core/platform/linux/device_discovery.cc b/onnxruntime/core/platform/linux/device_discovery.cc new file mode 100644 index 0000000000000..1db80c4d16c26 --- /dev/null +++ b/onnxruntime/core/platform/linux/device_discovery.cc @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/platform/device_discovery.h" + +#include "core/common/cpuid_info.h" + +namespace onnxruntime { + +namespace { +OrtHardwareDevice GetCpuDevice() { + const auto& cpuid_info = CPUIDInfo::GetCPUIDInfo(); + + OrtHardwareDevice cpu_device{}; + cpu_device.vendor = cpuid_info.GetCPUVendor(); + cpu_device.vendor_id = cpuid_info.GetCPUVendorId(); + cpu_device.device_id = 0; + cpu_device.type = OrtHardwareDeviceType_CPU; + + return cpu_device; +} + +} // namespace + +std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatform() { + std::unordered_set devices; + + // get CPU devices + devices.emplace(GetCpuDevice()); + + // get GPU devices + + // get NPU devices + + return devices; +} +} // namespace onnxruntime diff --git a/onnxruntime/core/platform/posix/device_discovery.cc b/onnxruntime/core/platform/posix/device_discovery.cc deleted file mode 100644 index 82564539ab5d4..0000000000000 --- a/onnxruntime/core/platform/posix/device_discovery.cc +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#include "core/platform/device_discovery.h" - -namespace onnxruntime { -std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatform() { - std::unordered_set devices; - // get CPU devices - - // get GPU devices - - // get NPU devices - - return devices; -} -} // namespace onnxruntime diff --git a/onnxruntime/test/platform/device_discovery_test.cc b/onnxruntime/test/platform/device_discovery_test.cc new file mode 100644 index 0000000000000..7e1c43c387ad8 --- /dev/null +++ b/onnxruntime/test/platform/device_discovery_test.cc @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/platform/device_discovery.h" + +#include "gtest/gtest.h" + +namespace onnxruntime::test { + +namespace { + +std::vector GetDevicesByType(OrtHardwareDeviceType device_type) { + std::vector result{}; + const auto& devices = DeviceDiscovery::GetDevices(); + std::copy_if(devices.begin(), devices.end(), std::back_inserter(result), + [device_type](const OrtHardwareDevice& device) { + return device.type == device_type; + }); + return result; +} + +} // namespace + +TEST(DeviceDiscoveryTest, HasCpuDevice) { + const auto cpu_devices = GetDevicesByType(OrtHardwareDeviceType_CPU); + ASSERT_GT(cpu_devices.size(), 0); +} + +} // namespace onnxruntime::test From 6101b9994f9827c6fc3d3350e4bd96b2e78c8de1 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Tue, 17 Jun 2025 17:45:36 -0700 Subject: [PATCH 04/39] support parsing hex string to int --- .../onnxruntime/core/common/parse_string.h | 23 ++++++++++++++++--- onnxruntime/test/common/string_utils_test.cc | 6 +++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/include/onnxruntime/core/common/parse_string.h b/include/onnxruntime/core/common/parse_string.h index 6345b2a55490d..6ddf14cf5860d 100644 --- a/include/onnxruntime/core/common/parse_string.h +++ b/include/onnxruntime/core/common/parse_string.h @@ -35,13 +35,30 @@ template std::enable_if_t, bool> TryParseStringWithClassicLocale(std::string_view str, T& value) { T parsed_value{}; - const auto [ptr, ec] = std::from_chars(str.data(), str.data() + str.size(), parsed_value); - if (ec != std::errc{}) { + std::from_chars_result conversion_result{}; + if constexpr (std::is_integral_v && std::is_unsigned_v) { + // For unsigned integral types, also handle hex values, i.e., those beginning with "0x". + // TODO We could also extend this to other types. For that, we would need to handle negative values. + const bool has_hex_prefix = str.size() >= 2 && + str[0] == '0' && + (str[1] == 'x' || str[1] == 'X'); + + if (has_hex_prefix) { + str = str.substr(2); + } + + const int base = has_hex_prefix ? 16 : 10; + conversion_result = std::from_chars(str.data(), str.data() + str.size(), parsed_value, base); + } else { + conversion_result = std::from_chars(str.data(), str.data() + str.size(), parsed_value); + } + + if (conversion_result.ec != std::errc{}) { return false; } - if (ptr != str.data() + str.size()) { + if (conversion_result.ptr != str.data() + str.size()) { return false; } diff --git a/onnxruntime/test/common/string_utils_test.cc b/onnxruntime/test/common/string_utils_test.cc index 79f8ddff7b52a..62925d68cb604 100644 --- a/onnxruntime/test/common/string_utils_test.cc +++ b/onnxruntime/test/common/string_utils_test.cc @@ -15,6 +15,8 @@ namespace test { namespace { template void TestSuccessfulParse(const std::string& input, const T& expected_value) { + SCOPED_TRACE(MakeString("Input: \"", input, "\", expected_value: ", expected_value)); + T value; ASSERT_TRUE(TryParseStringWithClassicLocale(input, value)); EXPECT_EQ(value, expected_value); @@ -22,6 +24,8 @@ void TestSuccessfulParse(const std::string& input, const T& expected_value) { template void TestFailedParse(const std::string& input) { + SCOPED_TRACE(MakeString("Input: \"", input, "\"")); + T value; EXPECT_FALSE(TryParseStringWithClassicLocale(input, value)); } @@ -31,6 +35,8 @@ TEST(StringUtilsTest, TryParseStringWithClassicLocale) { TestSuccessfulParse("-1", -1); TestSuccessfulParse("42", 42u); TestSuccessfulParse("2.5", 2.5f); + // TestSuccessfulParse("0x2.8", 2.5f); // we don't handle floating point hex yet + TestSuccessfulParse("0x100", uint32_t{0x100}); // out of range TestFailedParse("32768"); From a82a10c36be82f08ca9fa576a8dd6f0ee4e5f58f Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Tue, 17 Jun 2025 17:45:56 -0700 Subject: [PATCH 05/39] save work - linux GPU impl --- .../core/platform/linux/device_discovery.cc | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/onnxruntime/core/platform/linux/device_discovery.cc b/onnxruntime/core/platform/linux/device_discovery.cc index 1db80c4d16c26..f3f98cdf9eea1 100644 --- a/onnxruntime/core/platform/linux/device_discovery.cc +++ b/onnxruntime/core/platform/linux/device_discovery.cc @@ -3,7 +3,18 @@ #include "core/platform/device_discovery.h" +#include +#include +#include +#include + +#include "core/common/common.h" #include "core/common/cpuid_info.h" +#include "core/common/narrow.h" +#include "core/common/parse_string.h" +#include "core/common/string_utils.h" + +namespace fs = std::filesystem; namespace onnxruntime { @@ -20,6 +31,72 @@ OrtHardwareDevice GetCpuDevice() { return cpu_device; } +bool ParseGpuSysfsPath(const fs::path& sysfs_path, size_t& idx) { + const auto filename = sysfs_path.filename(); + const auto filename_str = std::string_view{filename.native()}; + + // Look for a filename matching "cardN". N is a number. + constexpr std::string_view prefix = "card"; + if (filename_str.find(prefix) != 0) { + return false; + } + + size_t parsed_idx{}; + if (!TryParseStringWithClassicLocale(filename_str.substr(prefix.size()), parsed_idx)) { + return false; + } + + idx = parsed_idx; + return true; +} + +std::string ReadFileContents(const fs::path& file_path) { + std::ifstream file{file_path}; + ORT_ENFORCE(file, "Failed to open file: ", file_path); + std::istreambuf_iterator file_begin{file}, file_end{}; + std::string contents(file_begin, file_end); + return contents; +} + +OrtHardwareDevice GetGpuDevice(const fs::path& sysfs_path, size_t idx) { + OrtHardwareDevice gpu_device{}; + + // vendor id + { + const auto vendor_file_path = sysfs_path / "device" / "vendor"; + const auto vendor_id_text = utils::TrimString(ReadFileContents(vendor_file_path)); + gpu_device.vendor_id = ParseStringWithClassicLocale(vendor_id_text); + } + + // TODO metadata["Discrete"] + + gpu_device.device_id = narrow(idx); + gpu_device.type = OrtHardwareDeviceType_GPU; + + return gpu_device; +} + +std::vector GetGpuDevices() { + std::vector gpu_devices{}; + + const auto sysfs_class_drm_path = "/sys/class/drm"; + + if (!fs::exists(sysfs_class_drm_path)) { + return gpu_devices; + } + + for (const auto& dir_item : fs::directory_iterator{sysfs_class_drm_path}) { + const auto& dir_item_path = dir_item.path(); + + if (size_t idx{}; ParseGpuSysfsPath(dir_item_path, idx)) { + auto gpu_device = GetGpuDevice(dir_item_path, idx); + gpu_devices.emplace_back(std::move(gpu_device)); + } + } + + return gpu_devices; +} + } // namespace std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatform() { @@ -29,6 +106,11 @@ std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatfor devices.emplace(GetCpuDevice()); // get GPU devices + { + auto gpu_devices = GetGpuDevices(); + devices.insert(std::make_move_iterator(gpu_devices.begin()), + std::make_move_iterator(gpu_devices.end())); + } // get NPU devices From 64e0a82b11a34412d05746bd29859d41c92ecd88 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Tue, 17 Jun 2025 20:11:06 -0700 Subject: [PATCH 06/39] fix Linux GPU device_id --- .../core/platform/linux/device_discovery.cc | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/onnxruntime/core/platform/linux/device_discovery.cc b/onnxruntime/core/platform/linux/device_discovery.cc index f3f98cdf9eea1..14996b730c2e8 100644 --- a/onnxruntime/core/platform/linux/device_discovery.cc +++ b/onnxruntime/core/platform/linux/device_discovery.cc @@ -10,7 +10,6 @@ #include "core/common/common.h" #include "core/common/cpuid_info.h" -#include "core/common/narrow.h" #include "core/common/parse_string.h" #include "core/common/string_utils.h" @@ -58,19 +57,25 @@ std::string ReadFileContents(const fs::path& file_path) { return contents; } -OrtHardwareDevice GetGpuDevice(const fs::path& sysfs_path, size_t idx) { +OrtHardwareDevice GetGpuDevice(const fs::path& sysfs_path) { OrtHardwareDevice gpu_device{}; // vendor id { - const auto vendor_file_path = sysfs_path / "device" / "vendor"; - const auto vendor_id_text = utils::TrimString(ReadFileContents(vendor_file_path)); + const auto vendor_id_path = sysfs_path / "device" / "vendor"; + const auto vendor_id_text = utils::TrimString(ReadFileContents(vendor_id_path)); gpu_device.vendor_id = ParseStringWithClassicLocale(vendor_id_text); } + // device id + { + const auto device_id_path = sysfs_path / "device" / "device"; + const auto device_id_text = utils::TrimString(ReadFileContents(device_id_path)); + gpu_device.device_id = ParseStringWithClassicLocale(device_id_text); + } + // TODO metadata["Discrete"] - gpu_device.device_id = narrow(idx); gpu_device.type = OrtHardwareDeviceType_GPU; return gpu_device; @@ -79,7 +84,7 @@ OrtHardwareDevice GetGpuDevice(const fs::path& sysfs_path, size_t idx) { std::vector GetGpuDevices() { std::vector gpu_devices{}; - const auto sysfs_class_drm_path = "/sys/class/drm"; + const fs::path sysfs_class_drm_path = "/sys/class/drm"; if (!fs::exists(sysfs_class_drm_path)) { return gpu_devices; @@ -89,7 +94,7 @@ std::vector GetGpuDevices() { const auto& dir_item_path = dir_item.path(); if (size_t idx{}; ParseGpuSysfsPath(dir_item_path, idx)) { - auto gpu_device = GetGpuDevice(dir_item_path, idx); + auto gpu_device = GetGpuDevice(dir_item_path); gpu_devices.emplace_back(std::move(gpu_device)); } } From 3bba3255f30fa105aee28cf4f5ab34c7c0fc8157 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Tue, 17 Jun 2025 20:17:06 -0700 Subject: [PATCH 07/39] Rename onnxruntime/core/platform/device_discovery.cc to device_discovery_common.cc --- .../platform/{device_discovery.cc => device_discovery_common.cc} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename onnxruntime/core/platform/{device_discovery.cc => device_discovery_common.cc} (100%) diff --git a/onnxruntime/core/platform/device_discovery.cc b/onnxruntime/core/platform/device_discovery_common.cc similarity index 100% rename from onnxruntime/core/platform/device_discovery.cc rename to onnxruntime/core/platform/device_discovery_common.cc From 0594d5a8e1a209addd5d9a65c82997bfcb36628e Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 18 Jun 2025 11:21:29 -0700 Subject: [PATCH 08/39] finish renaming --- cmake/onnxruntime_common.cmake | 2 +- onnxruntime/core/platform/device_discovery_common.cc | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake index 4de0ea5cfd18a..ce5af64f8d0d9 100644 --- a/cmake/onnxruntime_common.cmake +++ b/cmake/onnxruntime_common.cmake @@ -14,7 +14,7 @@ set(onnxruntime_common_src_patterns "${ONNXRUNTIME_ROOT}/core/platform/check_intel.h" "${ONNXRUNTIME_ROOT}/core/platform/check_intel.cc" "${ONNXRUNTIME_ROOT}/core/platform/device_discovery.h" - "${ONNXRUNTIME_ROOT}/core/platform/device_discovery.cc" + "${ONNXRUNTIME_ROOT}/core/platform/device_discovery_common.cc" "${ONNXRUNTIME_ROOT}/core/platform/env.h" "${ONNXRUNTIME_ROOT}/core/platform/env.cc" "${ONNXRUNTIME_ROOT}/core/platform/env_time.h" diff --git a/onnxruntime/core/platform/device_discovery_common.cc b/onnxruntime/core/platform/device_discovery_common.cc index decc25cc4f089..c77527801426f 100644 --- a/onnxruntime/core/platform/device_discovery_common.cc +++ b/onnxruntime/core/platform/device_discovery_common.cc @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// This file contains platform-agnostic device discovery implementation. + #include "core/platform/device_discovery.h" #include From 757d27abb6ce6edee59d49927455a9eb252f07f4 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 18 Jun 2025 15:30:32 -0700 Subject: [PATCH 09/39] add TODO for vendor name --- onnxruntime/core/platform/linux/device_discovery.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/onnxruntime/core/platform/linux/device_discovery.cc b/onnxruntime/core/platform/linux/device_discovery.cc index 14996b730c2e8..25c40db984781 100644 --- a/onnxruntime/core/platform/linux/device_discovery.cc +++ b/onnxruntime/core/platform/linux/device_discovery.cc @@ -67,6 +67,8 @@ OrtHardwareDevice GetGpuDevice(const fs::path& sysfs_path) { gpu_device.vendor_id = ParseStringWithClassicLocale(vendor_id_text); } + // TODO vendor name + // device id { const auto device_id_path = sysfs_path / "device" / "device"; From 34141a163d1897bc4839157edc827c905ca3d216 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 18 Jun 2025 15:44:14 -0700 Subject: [PATCH 10/39] add default impl, add todo comment for apple --- onnxruntime/core/platform/apple/device_discovery.cc | 2 ++ onnxruntime/core/platform/device_discovery_default.cc | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/onnxruntime/core/platform/apple/device_discovery.cc b/onnxruntime/core/platform/apple/device_discovery.cc index 905306ef67b6f..7313d1326a7e3 100644 --- a/onnxruntime/core/platform/apple/device_discovery.cc +++ b/onnxruntime/core/platform/apple/device_discovery.cc @@ -7,6 +7,8 @@ namespace onnxruntime { std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatform() { std::unordered_set devices; + // TODO implement + // get CPU devices // get GPU devices diff --git a/onnxruntime/core/platform/device_discovery_default.cc b/onnxruntime/core/platform/device_discovery_default.cc index d194bed477a31..62d7f8f2a77b8 100644 --- a/onnxruntime/core/platform/device_discovery_default.cc +++ b/onnxruntime/core/platform/device_discovery_default.cc @@ -1,2 +1,13 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. + +#include "core/platform/device_discovery.h" + +namespace onnxruntime { + +std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatform() { + // This is a default implementation which does not try to discover anything. + return {}; +} + +} // namespace onnxruntime From cc25212988b4c76b71e4205d6f955e1b70e263c9 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 26 Jun 2025 19:27:33 -0700 Subject: [PATCH 11/39] clean up code for linux gpu discovery --- .../core/platform/linux/device_discovery.cc | 78 +++++++++++-------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/onnxruntime/core/platform/linux/device_discovery.cc b/onnxruntime/core/platform/linux/device_discovery.cc index 25c40db984781..8ec8c046ec1b0 100644 --- a/onnxruntime/core/platform/linux/device_discovery.cc +++ b/onnxruntime/core/platform/linux/device_discovery.cc @@ -30,23 +30,42 @@ OrtHardwareDevice GetCpuDevice() { return cpu_device; } -bool ParseGpuSysfsPath(const fs::path& sysfs_path, size_t& idx) { - const auto filename = sysfs_path.filename(); - const auto filename_str = std::string_view{filename.native()}; - - // Look for a filename matching "cardN". N is a number. - constexpr std::string_view prefix = "card"; - if (filename_str.find(prefix) != 0) { - return false; +std::vector DetectGpuSysfsPaths() { + const fs::path sysfs_class_drm_path = "/sys/class/drm"; + + if (!fs::exists(sysfs_class_drm_path)) { + return {}; } - size_t parsed_idx{}; - if (!TryParseStringWithClassicLocale(filename_str.substr(prefix.size()), parsed_idx)) { - return false; + const auto detect_card_path = [](const fs::path& sysfs_path, size_t& idx) -> bool { + const auto filename = sysfs_path.filename(); + const auto filename_str = std::string_view{filename.native()}; + + // Look for a filename matching "cardN". N is a number. + constexpr std::string_view prefix = "card"; + if (filename_str.find(prefix) != 0) { + return false; + } + + size_t parsed_idx{}; + if (!TryParseStringWithClassicLocale(filename_str.substr(prefix.size()), parsed_idx)) { + return false; + } + + idx = parsed_idx; + return true; + }; + + std::vector gpu_sysfs_paths{}; + for (const auto& dir_item : fs::directory_iterator{sysfs_class_drm_path}) { + auto dir_item_path = dir_item.path(); + + if (size_t idx{}; detect_card_path(dir_item_path, idx)) { + gpu_sysfs_paths.emplace_back(std::move(dir_item_path)); + } } - idx = parsed_idx; - return true; + return gpu_sysfs_paths; } std::string ReadFileContents(const fs::path& file_path) { @@ -57,14 +76,19 @@ std::string ReadFileContents(const fs::path& file_path) { return contents; } +template +ValueType ReadValueFromFile(const fs::path& file_path) { + const auto file_text = utils::TrimString(ReadFileContents(file_path)); + return ParseStringWithClassicLocale(file_text); +} + OrtHardwareDevice GetGpuDevice(const fs::path& sysfs_path) { OrtHardwareDevice gpu_device{}; // vendor id { const auto vendor_id_path = sysfs_path / "device" / "vendor"; - const auto vendor_id_text = utils::TrimString(ReadFileContents(vendor_id_path)); - gpu_device.vendor_id = ParseStringWithClassicLocale(vendor_id_text); + gpu_device.vendor_id = ReadValueFromFile(vendor_id_path); } // TODO vendor name @@ -72,11 +96,10 @@ OrtHardwareDevice GetGpuDevice(const fs::path& sysfs_path) { // device id { const auto device_id_path = sysfs_path / "device" / "device"; - const auto device_id_text = utils::TrimString(ReadFileContents(device_id_path)); - gpu_device.device_id = ParseStringWithClassicLocale(device_id_text); + gpu_device.device_id = ReadValueFromFile(device_id_path); } - // TODO metadata["Discrete"] + // TODO metadata? e.g., is the device discrete? gpu_device.type = OrtHardwareDeviceType_GPU; @@ -84,21 +107,13 @@ OrtHardwareDevice GetGpuDevice(const fs::path& sysfs_path) { } std::vector GetGpuDevices() { + const auto gpu_sysfs_paths = DetectGpuSysfsPaths(); std::vector gpu_devices{}; + gpu_devices.reserve(gpu_sysfs_paths.size()); - const fs::path sysfs_class_drm_path = "/sys/class/drm"; - - if (!fs::exists(sysfs_class_drm_path)) { - return gpu_devices; - } - - for (const auto& dir_item : fs::directory_iterator{sysfs_class_drm_path}) { - const auto& dir_item_path = dir_item.path(); - - if (size_t idx{}; ParseGpuSysfsPath(dir_item_path, idx)) { - auto gpu_device = GetGpuDevice(dir_item_path); - gpu_devices.emplace_back(std::move(gpu_device)); - } + for (const auto& gpu_sysfs_path : gpu_sysfs_paths) { + auto gpu_device = GetGpuDevice(gpu_sysfs_path); + gpu_devices.emplace_back(std::move(gpu_device)); } return gpu_devices; @@ -120,6 +135,7 @@ std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatfor } // get NPU devices + // TODO figure out how to discover these return devices; } From 04b0758e233bfe28fd755f00d7d4bf4be221fd65 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Fri, 27 Jun 2025 18:59:00 -0700 Subject: [PATCH 12/39] add card_idx to linux gpu metadata --- .../core/platform/linux/device_discovery.cc | 39 ++++++++++++------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/onnxruntime/core/platform/linux/device_discovery.cc b/onnxruntime/core/platform/linux/device_discovery.cc index 8ec8c046ec1b0..91538463b298c 100644 --- a/onnxruntime/core/platform/linux/device_discovery.cc +++ b/onnxruntime/core/platform/linux/device_discovery.cc @@ -30,14 +30,19 @@ OrtHardwareDevice GetCpuDevice() { return cpu_device; } -std::vector DetectGpuSysfsPaths() { +struct GpuSysfsPathInfo { + size_t card_idx; + fs::path path; +}; + +std::vector DetectGpuSysfsPaths() { const fs::path sysfs_class_drm_path = "/sys/class/drm"; if (!fs::exists(sysfs_class_drm_path)) { return {}; } - const auto detect_card_path = [](const fs::path& sysfs_path, size_t& idx) -> bool { + const auto detect_card_path = [](const fs::path& sysfs_path, size_t& card_idx) -> bool { const auto filename = sysfs_path.filename(); const auto filename_str = std::string_view{filename.native()}; @@ -47,21 +52,24 @@ std::vector DetectGpuSysfsPaths() { return false; } - size_t parsed_idx{}; - if (!TryParseStringWithClassicLocale(filename_str.substr(prefix.size()), parsed_idx)) { + size_t parsed_card_idx{}; + if (!TryParseStringWithClassicLocale(filename_str.substr(prefix.size()), parsed_card_idx)) { return false; } - idx = parsed_idx; + card_idx = parsed_card_idx; return true; }; - std::vector gpu_sysfs_paths{}; + std::vector gpu_sysfs_paths{}; for (const auto& dir_item : fs::directory_iterator{sysfs_class_drm_path}) { auto dir_item_path = dir_item.path(); - if (size_t idx{}; detect_card_path(dir_item_path, idx)) { - gpu_sysfs_paths.emplace_back(std::move(dir_item_path)); + if (size_t card_idx{}; detect_card_path(dir_item_path, card_idx)) { + GpuSysfsPathInfo path_info{}; + path_info.card_idx = card_idx; + path_info.path = std::move(dir_item_path); + gpu_sysfs_paths.emplace_back(std::move(path_info)); } } @@ -82,8 +90,9 @@ ValueType ReadValueFromFile(const fs::path& file_path) { return ParseStringWithClassicLocale(file_text); } -OrtHardwareDevice GetGpuDevice(const fs::path& sysfs_path) { +OrtHardwareDevice GetGpuDevice(const GpuSysfsPathInfo& path_info) { OrtHardwareDevice gpu_device{}; + const auto& sysfs_path = path_info.path; // vendor id { @@ -99,7 +108,9 @@ OrtHardwareDevice GetGpuDevice(const fs::path& sysfs_path) { gpu_device.device_id = ReadValueFromFile(device_id_path); } - // TODO metadata? e.g., is the device discrete? + // metadata + gpu_device.metadata.Add("card_idx", MakeString(path_info.card_idx)); + // TODO is card discrete? gpu_device.type = OrtHardwareDeviceType_GPU; @@ -107,12 +118,12 @@ OrtHardwareDevice GetGpuDevice(const fs::path& sysfs_path) { } std::vector GetGpuDevices() { - const auto gpu_sysfs_paths = DetectGpuSysfsPaths(); + const auto gpu_sysfs_path_infos = DetectGpuSysfsPaths(); std::vector gpu_devices{}; - gpu_devices.reserve(gpu_sysfs_paths.size()); + gpu_devices.reserve(gpu_sysfs_path_infos.size()); - for (const auto& gpu_sysfs_path : gpu_sysfs_paths) { - auto gpu_device = GetGpuDevice(gpu_sysfs_path); + for (const auto& gpu_sysfs_path_info : gpu_sysfs_path_infos) { + auto gpu_device = GetGpuDevice(gpu_sysfs_path_info); gpu_devices.emplace_back(std::move(gpu_device)); } From f03f2294555626e3689cb19b572be7813ca882ee Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Fri, 27 Jun 2025 19:00:04 -0700 Subject: [PATCH 13/39] update OrtKeyValuePairs - add copy/move, make data members private, keep entries sorted --- .../core/platform/device_discovery_common.cc | 2 +- onnxruntime/core/session/abi_devices.h | 5 +- .../core/session/abi_key_value_pairs.h | 93 ++++++++++++++----- .../core/session/allocator_adapters.cc | 42 +++++---- onnxruntime/core/session/onnxruntime_c_api.cc | 9 +- .../core/session/provider_policy_context.cc | 4 +- onnxruntime/core/session/utils.cc | 4 +- 7 files changed, 103 insertions(+), 56 deletions(-) diff --git a/onnxruntime/core/platform/device_discovery_common.cc b/onnxruntime/core/platform/device_discovery_common.cc index c77527801426f..2adc556899db3 100644 --- a/onnxruntime/core/platform/device_discovery_common.cc +++ b/onnxruntime/core/platform/device_discovery_common.cc @@ -26,7 +26,7 @@ const std::unordered_set& DeviceDiscovery::GetDevices() { << ", vendor:" << ortdevice.vendor << ", type:" << std::dec << static_cast(ortdevice.type) << ", metadata: ["; - for (auto& [key, value] : ortdevice.metadata.entries) { + for (auto& [key, value] : ortdevice.metadata.Entries()) { oss << key << "=" << value << ", "; } oss << "]}"; diff --git a/onnxruntime/core/session/abi_devices.h b/onnxruntime/core/session/abi_devices.h index 06041eb0086ac..a1a284a86ac1d 100644 --- a/onnxruntime/core/session/abi_devices.h +++ b/onnxruntime/core/session/abi_devices.h @@ -22,7 +22,7 @@ struct OrtHardwareDevice { onnxruntime::HashCombine(hd.vendor_id, h); onnxruntime::HashCombine(hd.vendor, h); onnxruntime::HashCombine(hd.type, h); - for (const auto& [key, value] : hd.metadata.entries) { + for (const auto& [key, value] : hd.metadata.Entries()) { onnxruntime::HashCombine(key, h); onnxruntime::HashCombine(value, h); } @@ -47,8 +47,7 @@ struct equal_to { lhs.vendor_id == rhs.vendor_id && lhs.device_id == rhs.device_id && lhs.vendor == rhs.vendor && - lhs.metadata.keys == rhs.metadata.keys && - lhs.metadata.values == rhs.metadata.values; + lhs.metadata.Entries() == rhs.metadata.Entries(); } }; } // namespace std diff --git a/onnxruntime/core/session/abi_key_value_pairs.h b/onnxruntime/core/session/abi_key_value_pairs.h index 150575b3a9efc..5794530cbaf51 100644 --- a/onnxruntime/core/session/abi_key_value_pairs.h +++ b/onnxruntime/core/session/abi_key_value_pairs.h @@ -4,20 +4,45 @@ #pragma once #include +#include #include -#include #include +#include + +#include "gsl/gsl" struct OrtKeyValuePairs { - std::unordered_map entries; - // members to make returning all key/value entries via the C API easier - std::vector keys; - std::vector values; + OrtKeyValuePairs() = default; + + OrtKeyValuePairs(const OrtKeyValuePairs& other) { + CopyFromMap(other.entries_); + } + + OrtKeyValuePairs(OrtKeyValuePairs&& other) : OrtKeyValuePairs{} { + swap(*this, other); + } + + OrtKeyValuePairs& operator=(OrtKeyValuePairs other) { // handles copy and move assignment + swap(*this, other); + return *this; + } + + friend void swap(OrtKeyValuePairs& a, OrtKeyValuePairs& b) { + using std::swap; + swap(a.entries_, b.entries_); + swap(a.keys_, b.keys_); + swap(a.values_, b.values_); + } - void Copy(const std::unordered_map& src) { - entries = src; + void CopyFromMap(std::map src) { + entries_ = std::move(src); Sync(); } + + void CopyFromMap(const std::unordered_map& src) { + CopyFromMap(std::map(src.begin(), src.end())); + } + void Add(const char* key, const char* value) { // ignore if either are nullptr. if (key && value) { @@ -30,12 +55,12 @@ struct OrtKeyValuePairs { return; } - auto iter_inserted = entries.insert({key, value}); + auto iter_inserted = entries_.insert({key, value}); bool inserted = iter_inserted.second; if (inserted) { const auto& entry = *iter_inserted.first; - keys.push_back(entry.first.c_str()); - values.push_back(entry.second.c_str()); + keys_.push_back(entry.first.c_str()); + values_.push_back(entry.second.c_str()); } else { // rebuild is easier and changing an entry is not expected to be a common case. Sync(); @@ -48,27 +73,47 @@ struct OrtKeyValuePairs { return; } - auto iter = entries.find(key); - if (iter != entries.end()) { - auto key_iter = std::find(keys.begin(), keys.end(), iter->first.c_str()); - // there should only ever be one matching entry, and keys and values should be in sync - if (key_iter != keys.end()) { - auto idx = std::distance(keys.begin(), key_iter); - keys.erase(key_iter); - values.erase(values.begin() + idx); + auto iter = entries_.find(key); + if (iter != entries_.end()) { + auto key_iter = std::find(keys_.begin(), keys_.end(), iter->first.c_str()); + // there should only ever be one matching entry, and keys_ and values_ should be in sync + if (key_iter != keys_.end()) { + auto idx = std::distance(keys_.begin(), key_iter); + keys_.erase(key_iter); + values_.erase(values_.begin() + idx); } - entries.erase(iter); + entries_.erase(iter); } } + const std::map& Entries() const { + return entries_; + } + + gsl::span Keys() const { + return keys_; + } + + gsl::span Values() const { + return values_; + } + private: void Sync() { - keys.clear(); - values.clear(); - for (const auto& entry : entries) { - keys.push_back(entry.first.c_str()); - values.push_back(entry.second.c_str()); + keys_.clear(); + values_.clear(); + for (const auto& entry : entries_) { + keys_.push_back(entry.first.c_str()); + values_.push_back(entry.second.c_str()); } } + + // Note: Use std::map so that we can iterate through entries in a deterministic order. + std::map entries_; + + // members to make returning all key/value entries via the C API easier + // Note: The elements point to strings owned by `entries_`. + std::vector keys_; + std::vector values_; }; diff --git a/onnxruntime/core/session/allocator_adapters.cc b/onnxruntime/core/session/allocator_adapters.cc index 5d1f84ba96cf2..9747f4892bdf8 100644 --- a/onnxruntime/core/session/allocator_adapters.cc +++ b/onnxruntime/core/session/allocator_adapters.cc @@ -40,7 +40,7 @@ OrtAllocatorImplWrappingIAllocator::OrtAllocatorImplWrappingIAllocator(onnxrunti API_IMPL_BEGIN auto kvp = std::make_unique(); auto stats_map = static_cast(this_)->Stats(); - kvp->Copy(stats_map); + kvp->CopyFromMap(stats_map); *stats = reinterpret_cast(kvp.release()); return nullptr; API_IMPL_END @@ -120,25 +120,27 @@ void IAllocatorImplWrappingOrtAllocator::GetStats(AllocatorStats* stats) { std::unique_ptr kvp_guard(&kvps, release_fn); - for (size_t i = 0; i < kvps->keys.size(); ++i) { - if (strcmp(kvps->keys[i], "Limit") == 0) { - stats->bytes_limit = std::stoll(kvps->values[i]); - } else if (strcmp(kvps->keys[i], "InUse") == 0) { - stats->bytes_in_use = std::stoll(kvps->values[i]); - } else if (strcmp(kvps->keys[i], "TotalAllocated") == 0) { - stats->total_allocated_bytes = std::stoll(kvps->values[i]); - } else if (strcmp(kvps->keys[i], "MaxInUse") == 0) { - stats->max_bytes_in_use = std::stoll(kvps->values[i]); - } else if (strcmp(kvps->keys[i], "NumAllocs") == 0) { - stats->num_allocs = std::stoll(kvps->values[i]); - } else if (strcmp(kvps->keys[i], "NumReserves") == 0) { - stats->num_reserves = std::stoll(kvps->values[i]); - } else if (strcmp(kvps->keys[i], "NumArenaExtensions") == 0) { - stats->num_arena_extensions = std::stoll(kvps->values[i]); - } else if (strcmp(kvps->keys[i], "NumArenaShrinkages") == 0) { - stats->num_arena_shrinkages = std::stoll(kvps->values[i]); - } else if (strcmp(kvps->keys[i], "MaxAllocSize") == 0) { - stats->max_alloc_size = std::stoll(kvps->values[i]); + const auto keys = kvps->Keys(), values = kvps->Values(); + + for (size_t i = 0; i < keys.size(); ++i) { + if (strcmp(keys[i], "Limit") == 0) { + stats->bytes_limit = std::stoll(values[i]); + } else if (strcmp(keys[i], "InUse") == 0) { + stats->bytes_in_use = std::stoll(values[i]); + } else if (strcmp(keys[i], "TotalAllocated") == 0) { + stats->total_allocated_bytes = std::stoll(values[i]); + } else if (strcmp(keys[i], "MaxInUse") == 0) { + stats->max_bytes_in_use = std::stoll(values[i]); + } else if (strcmp(keys[i], "NumAllocs") == 0) { + stats->num_allocs = std::stoll(values[i]); + } else if (strcmp(keys[i], "NumReserves") == 0) { + stats->num_reserves = std::stoll(values[i]); + } else if (strcmp(keys[i], "NumArenaExtensions") == 0) { + stats->num_arena_extensions = std::stoll(values[i]); + } else if (strcmp(keys[i], "NumArenaShrinkages") == 0) { + stats->num_arena_shrinkages = std::stoll(values[i]); + } else if (strcmp(keys[i], "MaxAllocSize") == 0) { + stats->max_alloc_size = std::stoll(values[i]); } } } diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index c18a69265e110..5d1f7f9c7bbc9 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -2911,7 +2911,8 @@ ORT_API(void, OrtApis::AddKeyValuePair, _In_ OrtKeyValuePairs* kvps, ORT_API(const char*, OrtApis::GetKeyValue, _In_ const OrtKeyValuePairs* kvps, _In_ const char* key) { const char* value = nullptr; - if (auto entry = kvps->entries.find(key); entry != kvps->entries.end()) { + const auto& entries = kvps->Entries(); + if (auto entry = entries.find(key); entry != entries.end()) { value = entry->second.c_str(); } @@ -2920,9 +2921,9 @@ ORT_API(const char*, OrtApis::GetKeyValue, _In_ const OrtKeyValuePairs* kvps, _I ORT_API(void, OrtApis::GetKeyValuePairs, _In_ const OrtKeyValuePairs* kvps, _Outptr_ const char* const** keys, _Outptr_ const char* const** values, _Out_ size_t* num_entries) { - *keys = kvps->keys.data(); - *values = kvps->values.data(); - *num_entries = kvps->entries.size(); + *keys = kvps->Keys().data(); + *values = kvps->Values().data(); + *num_entries = kvps->Entries().size(); } ORT_API(void, OrtApis::RemoveKeyValuePair, _Frees_ptr_opt_ OrtKeyValuePairs* kvps, _In_ const char* key) { diff --git a/onnxruntime/core/session/provider_policy_context.cc b/onnxruntime/core/session/provider_policy_context.cc index edd937c870260..8a8840278a769 100644 --- a/onnxruntime/core/session/provider_policy_context.cc +++ b/onnxruntime/core/session/provider_policy_context.cc @@ -30,7 +30,7 @@ bool IsDiscreteDevice(const OrtEpDevice* d) { return false; } - const auto& entries = d->device->metadata.entries; + const auto& entries = d->device->metadata.Entries(); if (auto it = entries.find("Discrete"); it != entries.end()) { return it->second == "1"; } @@ -303,7 +303,7 @@ Status ProviderPolicyContext::AddEpDefaultOptionsToSession(InferenceSession& ses auto& config_options = sess.GetMutableSessionOptions().config_options; for (auto device : devices) { const std::string ep_options_prefix = OrtSessionOptions::GetProviderOptionPrefix(device->ep_name.c_str()); - for (const auto& [key, value] : device->ep_options.entries) { + for (const auto& [key, value] : device->ep_options.Entries()) { const std::string option_key = ep_options_prefix + key; // preserve user-provided options as they override any defaults the EP factory specified earlier if (config_options.configurations.find(option_key) == config_options.configurations.end()) { diff --git a/onnxruntime/core/session/utils.cc b/onnxruntime/core/session/utils.cc index d0f2e862d61d9..69039beb49363 100644 --- a/onnxruntime/core/session/utils.cc +++ b/onnxruntime/core/session/utils.cc @@ -60,7 +60,7 @@ Status TestAutoSelectEPsImpl(const Environment& env, InferenceSession& sess, con // add ep_options to SessionOptions with prefix. // preserve any user provided values. const std::string ep_options_prefix = OrtSessionOptions::GetProviderOptionPrefix(ep_device->ep_name.c_str()); - for (const auto& [key, value] : ep_device->ep_options.entries) { + for (const auto& [key, value] : ep_device->ep_options.Entries()) { auto prefixed_key = ep_options_prefix + key; if (session_options.config_options.configurations.count(key) == 0) { // add the default value with prefix @@ -353,7 +353,7 @@ Status CreateIExecutionProviderFactoryForEpDevices(const Environment& env, // first add the default values with prefix followed by user specified values so those win const std::string prefix = OrtSessionOptions::GetProviderOptionPrefix(ep_device->ep_name.c_str()); auto& config_options = session_options.config_options; - for (const auto& [key, value] : ep_device->ep_options.entries) { + for (const auto& [key, value] : ep_device->ep_options.Entries()) { ORT_RETURN_IF_ERROR(config_options.AddConfigEntry((prefix + key).c_str(), value.c_str())); } From c089033a0dd7850b67564b4ba2bac0e167626612 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Sat, 28 Jun 2025 16:22:34 -0700 Subject: [PATCH 14/39] save work - apple cpu discovery --- onnxruntime/core/common/cpuid_info.cc | 46 +++++++++++++++++++ onnxruntime/core/common/cpuid_info.h | 1 + .../core/platform/apple/device_discovery.cc | 24 +++++++++- 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index d876a88446cd1..d88f8fbc88f23 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -1,6 +1,9 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "core/common/cpuid_info.h" + +#include + #include "core/common/logging/logging.h" #include "core/common/logging/severity.h" #include "core/platform/check_intel.h" @@ -53,6 +56,14 @@ #endif // _WIN32 +#if defined(__APPLE__) +#if defined(CPUIDINFO_ARCH_ARM) + +#include + +#endif // defined(CPUIDINFO_ARCH_ARM) +#endif // defined(__APPLE__) + #if defined(CPUINFO_SUPPORTED) #include #if defined(CPUIDINFO_ARCH_ARM) @@ -175,6 +186,7 @@ uint32_t CPUIDInfo::GetVendorId(const std::string& vendor) { if (vendor == "AuthenticAMD") return 0x1022; if (vendor.find("Qualcomm") == 0) return 'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24); if (vendor.find("NV") == 0) return 0x10DE; + if (vendor == "Apple") return 0x106B; return 0; } @@ -373,6 +385,40 @@ void CPUIDInfo::ArmAppleInit() { } } +std::string CPUIDInfo::GetArmAppleVendor() { + auto get_sysctl_value = [](const char* key) -> std::optional { + size_t value_length{}; + std::string value{}; + + if (sysctlbyname(key, nullptr, &value_length, nullptr, 0) != ENOMEM) { + LOGS_DEFAULT(WARNING) << "Failed to get '" << key << "' value length with sysctlbyname()."; + return value; + } + + value.resize(value_length); + if (sysctlbyname(key, &value.data(), &value_length, nullptr, 0) != 0) { + LOGS_DEFAULT(WARNING) << "Failed to get '" << key << "' value with sysctlbyname()."; + } + + return value; + }; + + constexpr auto vendor_key = "machdep.cpu.vendor"; + if (auto vendor = get_sysctl_value(vendor_key); vendor.has_value()) { + return *vendor; + } + + constexpr auto brand_string_key = "machdep.cpu.brand_string"; + if (auto brand_string = get_sysctl_value(brand_string_key); brand_string.has_value()) { + if (brand_string->find("Apple") != std::string::npos) { + return "Apple"; + } + } + + LOGS_DEFAULT(WARNING) << "Unable to determine CPU vendor."; + return ""; +} + #endif // defined(__APPLE__) #endif // defined(CPUIDINFO_ARCH_ARM) diff --git a/onnxruntime/core/common/cpuid_info.h b/onnxruntime/core/common/cpuid_info.h index cef50c159a718..1816999bca785 100644 --- a/onnxruntime/core/common/cpuid_info.h +++ b/onnxruntime/core/common/cpuid_info.h @@ -158,6 +158,7 @@ class CPUIDInfo { #elif defined(__APPLE__) void ArmAppleInit(); + std::string GetArmAppleVendor(); #endif diff --git a/onnxruntime/core/platform/apple/device_discovery.cc b/onnxruntime/core/platform/apple/device_discovery.cc index 7313d1326a7e3..906e5888464d4 100644 --- a/onnxruntime/core/platform/apple/device_discovery.cc +++ b/onnxruntime/core/platform/apple/device_discovery.cc @@ -3,13 +3,33 @@ #include "core/platform/device_discovery.h" +#include "core/common/cpuid_info.h" + namespace onnxruntime { + +namespace { + +OrtHardwareDevice GetCpuDevice() { + const auto& cpuid_info = CPUIDInfo::GetCPUIDInfo(); + + OrtHardwareDevice cpu_device{}; + cpu_device.vendor = cpuid_info.GetCPUVendor(); + cpu_device.vendor_id = cpuid_info.GetCPUVendorId(); + cpu_device.device_id = 0; + cpu_device.type = OrtHardwareDeviceType_CPU; + + return cpu_device; +} + +} // namespace + std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatform() { std::unordered_set devices; - // TODO implement - // get CPU devices + devices.insert(GetCpuDevice()); + + // TODO // get GPU devices From 55c3a8c90814920c5b62882b19deaa1dc60bd045 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:32:31 -0700 Subject: [PATCH 15/39] fix up cpu vendor detection --- onnxruntime/core/common/cpuid_info.cc | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index d88f8fbc88f23..66e226e262f3b 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -195,6 +195,9 @@ uint32_t CPUIDInfo::GetVendorId(const std::string& vendor) { #if defined(__linux__) void CPUIDInfo::ArmLinuxInit() { + vendor_ = GetArmLinuxVendor(); + vendor_id_ = GetVendorId(vendor_); + // Assuming no hyper-threading, no NUMA groups #if defined(CPUINFO_SUPPORTED) if (pytorch_cpuinfo_init_) { @@ -365,6 +368,9 @@ std::string CPUIDInfo::GetArmWindowsVendor() { #elif defined(__APPLE__) // ^ defined(_WIN32) void CPUIDInfo::ArmAppleInit() { + vendor_ = GetArmAppleVendor(); + vendor_id_ = GetVendorId(vendor_); + #if defined(CPUINFO_SUPPORTED) if (pytorch_cpuinfo_init_) { is_hybrid_ = cpuinfo_get_uarchs_count() > 1; @@ -388,16 +394,23 @@ void CPUIDInfo::ArmAppleInit() { std::string CPUIDInfo::GetArmAppleVendor() { auto get_sysctl_value = [](const char* key) -> std::optional { size_t value_length{}; - std::string value{}; - - if (sysctlbyname(key, nullptr, &value_length, nullptr, 0) != ENOMEM) { - LOGS_DEFAULT(WARNING) << "Failed to get '" << key << "' value length with sysctlbyname()."; - return value; + if (sysctlbyname(key, nullptr, &value_length, nullptr, 0) != 0) { + const auto error = errno; + if (error == ENOENT) { + LOGS_DEFAULT(INFO) << "sysctlbyname() key not found: '" << key << "'"; + } else { + LOGS_DEFAULT(WARNING) << "Failed to get '" << key << "' value length with sysctlbyname(). " + << "Error: " << error; + } + return std::nullopt; } + std::string value{}; value.resize(value_length); - if (sysctlbyname(key, &value.data(), &value_length, nullptr, 0) != 0) { - LOGS_DEFAULT(WARNING) << "Failed to get '" << key << "' value with sysctlbyname()."; + if (sysctlbyname(key, value.data(), &value_length, nullptr, 0) != 0) { + const auto error = errno; + LOGS_DEFAULT(WARNING) << "Failed to get '" << key << "' value with sysctlbyname(). " + << "Error: " << error; } return value; From 231d3473961b4ab04b35f74ef52df5f805232ee3 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:36:36 -0700 Subject: [PATCH 16/39] add check for non-zero detected vendor id --- onnxruntime/test/platform/device_discovery_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/onnxruntime/test/platform/device_discovery_test.cc b/onnxruntime/test/platform/device_discovery_test.cc index 7e1c43c387ad8..0d78fe6e25909 100644 --- a/onnxruntime/test/platform/device_discovery_test.cc +++ b/onnxruntime/test/platform/device_discovery_test.cc @@ -24,6 +24,7 @@ std::vector GetDevicesByType(OrtHardwareDeviceType device_typ TEST(DeviceDiscoveryTest, HasCpuDevice) { const auto cpu_devices = GetDevicesByType(OrtHardwareDeviceType_CPU); ASSERT_GT(cpu_devices.size(), 0); + ASSERT_NE(cpu_devices[0].vendor_id, 0); } } // namespace onnxruntime::test From 9e686ca82dc9725d26e023fc681b922ab5620e5e Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:46:47 -0700 Subject: [PATCH 17/39] fix formatting --- include/onnxruntime/core/common/parse_string.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/onnxruntime/core/common/parse_string.h b/include/onnxruntime/core/common/parse_string.h index 6ddf14cf5860d..1a9580184465b 100644 --- a/include/onnxruntime/core/common/parse_string.h +++ b/include/onnxruntime/core/common/parse_string.h @@ -38,7 +38,7 @@ TryParseStringWithClassicLocale(std::string_view str, T& value) { std::from_chars_result conversion_result{}; if constexpr (std::is_integral_v && std::is_unsigned_v) { - // For unsigned integral types, also handle hex values, i.e., those beginning with "0x". + // For unsigned integral types, also handle hex values, i.e., those beginning with "0x". // TODO We could also extend this to other types. For that, we would need to handle negative values. const bool has_hex_prefix = str.size() >= 2 && str[0] == '0' && From c53921d2112a8ac19f91185492ced78f8584477c Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Tue, 1 Jul 2025 11:03:50 -0700 Subject: [PATCH 18/39] fix build error --- onnxruntime/core/framework/allocator.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/framework/allocator.cc b/onnxruntime/core/framework/allocator.cc index c2ff70b8e9808..d32176be2c4fe 100644 --- a/onnxruntime/core/framework/allocator.cc +++ b/onnxruntime/core/framework/allocator.cc @@ -31,27 +31,28 @@ Status OrtArenaCfg::FromKeyValuePairs(const OrtKeyValuePairs& kvps, OrtArenaCfg& return Status::OK(); }; - if (auto it = kvps.entries.find(ConfigKeyNames::ArenaExtendStrategy); it != kvps.entries.end()) { + const auto& kvps_entries = kvps.Entries(); + if (auto it = kvps_entries.find(ConfigKeyNames::ArenaExtendStrategy); it != kvps_entries.end()) { ORT_RETURN_IF_ERROR(from_string(it->first, it->second, cfg.arena_extend_strategy)); } - if (auto it = kvps.entries.find(ConfigKeyNames::InitialChunkSizeBytes); it != kvps.entries.end()) { + if (auto it = kvps_entries.find(ConfigKeyNames::InitialChunkSizeBytes); it != kvps_entries.end()) { ORT_RETURN_IF_ERROR(from_string(it->first, it->second, cfg.initial_chunk_size_bytes)); } - if (auto it = kvps.entries.find(ConfigKeyNames::MaxDeadBytesPerChunk); it != kvps.entries.end()) { + if (auto it = kvps_entries.find(ConfigKeyNames::MaxDeadBytesPerChunk); it != kvps_entries.end()) { ORT_RETURN_IF_ERROR(from_string(it->first, it->second, cfg.max_dead_bytes_per_chunk)); } - if (auto it = kvps.entries.find(ConfigKeyNames::InitialGrowthChunkSizeBytes); it != kvps.entries.end()) { + if (auto it = kvps_entries.find(ConfigKeyNames::InitialGrowthChunkSizeBytes); it != kvps_entries.end()) { ORT_RETURN_IF_ERROR(from_string(it->first, it->second, cfg.initial_growth_chunk_size_bytes)); } - if (auto it = kvps.entries.find(ConfigKeyNames::MaxPowerOfTwoExtendBytes); it != kvps.entries.end()) { + if (auto it = kvps_entries.find(ConfigKeyNames::MaxPowerOfTwoExtendBytes); it != kvps_entries.end()) { ORT_RETURN_IF_ERROR(from_string(it->first, it->second, cfg.max_power_of_two_extend_bytes)); } - if (auto it = kvps.entries.find(ConfigKeyNames::MaxMem); it != kvps.entries.end()) { + if (auto it = kvps_entries.find(ConfigKeyNames::MaxMem); it != kvps_entries.end()) { ORT_RETURN_IF_ERROR(from_string(it->first, it->second, cfg.max_mem)); } From 48c6bb4694da750b89b3e8e505d397e4bbbd6fda Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Tue, 1 Jul 2025 17:01:45 -0700 Subject: [PATCH 19/39] hardcoded apple device discovery --- .../core/platform/apple/device_discovery.cc | 83 ++++++++++++++++++- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/platform/apple/device_discovery.cc b/onnxruntime/core/platform/apple/device_discovery.cc index 906e5888464d4..e41abd9143afe 100644 --- a/onnxruntime/core/platform/apple/device_discovery.cc +++ b/onnxruntime/core/platform/apple/device_discovery.cc @@ -3,12 +3,19 @@ #include "core/platform/device_discovery.h" +#include +#include + #include "core/common/cpuid_info.h" +#include "core/common/logging/logging.h" namespace onnxruntime { namespace { +constexpr auto kApplePciVendorId = 0x106B; +constexpr auto kAppleVendorName = "Apple"; + OrtHardwareDevice GetCpuDevice() { const auto& cpuid_info = CPUIDInfo::GetCPUIDInfo(); @@ -21,6 +28,72 @@ OrtHardwareDevice GetCpuDevice() { return cpu_device; } +std::vector GetGpuDevices() { + std::vector result{}; + + // For now, we assume the existence of one GPU if it is a Mac with Apple Silicon. + // TODO support iOS + // TODO support Intel Macs which may have more than one GPU +#if TARGET_OS_OSX && TARGET_CPU_ARM64 + { + OrtHardwareDevice gpu_device{}; + gpu_device.type = OrtHardwareDeviceType_GPU; + gpu_device.vendor_id = kApplePciVendorId; + gpu_device.vendor = kAppleVendorName; + + result.emplace_back(std::move(gpu_device)); + } +#endif // TARGET_OS_OSX && TARGET_CPU_ARM64 + + return result; +} + +bool HasAppleNeuralEngine() { + // Copied from onnxruntime/core/providers/coreml/builders/helper.cc:HasNeuralEngine(). + bool has_apple_neural_engine = false; + + struct utsname system_info; + uname(&system_info); + LOGS_DEFAULT(VERBOSE) << "Current Apple hardware info: " << system_info.machine; + +#if TARGET_OS_IPHONE + // utsname.machine has device identifier. For example, identifier for iPhone Xs is "iPhone11,2". + // Since Neural Engine is only available for use on A12 and later, major device version in the + // identifier is checked for these models: + // A12: iPhone XS (11,2), iPad Mini - 5th Gen (11,1) + // A12X: iPad Pro - 3rd Gen (8,1) + // For more information, see https://www.theiphonewiki.com/wiki/Models + size_t str_len = strnlen(system_info.machine, onnxruntime::kMaxStrLen); + if (str_len > 4 && strncmp("iPad", system_info.machine, 4) == 0) { + const int major_version = atoi(system_info.machine + 4); + has_apple_neural_engine = major_version >= 8; // There are no device between iPad 8 and 11. + } else if (str_len > 6 && strncmp("iPhone", system_info.machine, 6) == 0) { + const int major_version = atoi(system_info.machine + 6); + has_apple_neural_engine = major_version >= 11; + } +#elif TARGET_OS_OSX && TARGET_CPU_ARM64 + // Only Mac with arm64 CPU (Apple Silicon) has ANE. + has_apple_neural_engine = true; +#endif // #if TARGET_OS_IPHONE + + return has_apple_neural_engine; +} + +std::vector GetNpuDevices() { + std::vector result{}; + + if (HasAppleNeuralEngine()) { + OrtHardwareDevice npu_device{}; + npu_device.type = OrtHardwareDeviceType_NPU; + npu_device.vendor_id = kApplePciVendorId; + npu_device.vendor = kAppleVendorName; + + result.emplace_back(std::move(npu_device)); + } + + return result; +} + } // namespace std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatform() { @@ -29,11 +102,17 @@ std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatfor // get CPU devices devices.insert(GetCpuDevice()); - // TODO - // get GPU devices + { + auto gpu_devices = GetGpuDevices(); + devices.insert(gpu_devices.begin(), gpu_devices.end()); + } // get NPU devices + { + auto npu_devices = GetNpuDevices(); + devices.insert(npu_devices.begin(), npu_devices.end()); + } return devices; } From 6aa11ab77a469d6cca5556114bee9737ff879cdb Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Tue, 1 Jul 2025 17:31:36 -0700 Subject: [PATCH 20/39] fix build issue in onnxruntime_pybind_state.cc --- onnxruntime/python/onnxruntime_pybind_state.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 03db6f069cd75..ef4d2ddd3bc37 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -1987,7 +1987,7 @@ void addObjectMethods(py::module& m, ExecutionProviderRegistrationFn ep_registra .def_property_readonly( "metadata", [](OrtHardwareDevice* hw_device) -> std::unordered_map { - return hw_device->metadata.entries; + return hw_device->metadata.Entries(); }, R"pbdoc(Hardware device's metadata as string key/value pairs.)pbdoc"); @@ -2005,13 +2005,13 @@ for model inference.)pbdoc"); .def_property_readonly( "ep_metadata", [](OrtEpDevice* ep_device) -> std::unordered_map { - return ep_device->ep_metadata.entries; + return ep_device->ep_metadata.Entries(); }, R"pbdoc(The execution provider's additional metadata for the OrtHardwareDevice.)pbdoc") .def_property_readonly( "ep_options", [](OrtEpDevice* ep_device) -> std::unordered_map { - return ep_device->ep_options.entries; + return ep_device->ep_options.Entries(); }, R"pbdoc(The execution provider's options used to configure the provider to use the OrtHardwareDevice.)pbdoc") .def_property_readonly( From f926cdcb7eff310034e3be043cc94ef07ab59d5f Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 24 Jul 2025 17:21:51 -0700 Subject: [PATCH 21/39] enable logging before ORT logging is up --- onnxruntime/core/common/cpuid_info.cc | 28 ++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index 66e226e262f3b..d9edfe78053fa 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -2,6 +2,7 @@ // Licensed under the MIT License. #include "core/common/cpuid_info.h" +#include #include #include "core/common/logging/logging.h" @@ -87,6 +88,21 @@ void decodeMIDR(uint32_t midr, uint32_t uarch[1]); namespace onnxruntime { +namespace { + +// Log function that uses ORT logging if available or writes to stderr. +// This enables us to log even before ORT logging has been initialized. +[[maybe_unused]] +void LogWarning(std::string_view message) { + if (logging::LoggingManager::HasDefaultLogger()) { + LOGS_DEFAULT(WARNING) << message; + } else { + std::cerr << "onnxruntime cpuid_info warning: " << message << "\n"; + } +} + +} // namespace + #if defined(CPUIDINFO_ARCH_X86) static inline void GetCPUID(int function_id, int data[4]) { // NOLINT @@ -247,7 +263,7 @@ std::string CPUIDInfo::GetArmLinuxVendor() { CpuInfo cpu_info{}; Status parse_status = ParseCpuInfoFile(cpu_info); if (!parse_status.IsOK()) { - LOGS_DEFAULT(WARNING) << "Failed to parse /proc/cpuinfo file. Error: " << parse_status; + LogWarning(MakeString("Failed to parse /proc/cpuinfo file. Error: ", parse_status)); } if (cpu_info.size() > 0) { @@ -397,10 +413,9 @@ std::string CPUIDInfo::GetArmAppleVendor() { if (sysctlbyname(key, nullptr, &value_length, nullptr, 0) != 0) { const auto error = errno; if (error == ENOENT) { - LOGS_DEFAULT(INFO) << "sysctlbyname() key not found: '" << key << "'"; + // key not found } else { - LOGS_DEFAULT(WARNING) << "Failed to get '" << key << "' value length with sysctlbyname(). " - << "Error: " << error; + LogWarning(MakeString("Failed to get '", key, "' value length with sysctlbyname(). Error: ", error)); } return std::nullopt; } @@ -409,8 +424,7 @@ std::string CPUIDInfo::GetArmAppleVendor() { value.resize(value_length); if (sysctlbyname(key, value.data(), &value_length, nullptr, 0) != 0) { const auto error = errno; - LOGS_DEFAULT(WARNING) << "Failed to get '" << key << "' value with sysctlbyname(). " - << "Error: " << error; + LogWarning(MakeString("Failed to get '", key, "' value with sysctlbyname(). Error: ", error)); } return value; @@ -428,7 +442,7 @@ std::string CPUIDInfo::GetArmAppleVendor() { } } - LOGS_DEFAULT(WARNING) << "Unable to determine CPU vendor."; + LogWarning("Unable to determine CPU vendor."); return ""; } From 5b8764e0beee57d45398f1141a3f987f96602d87 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Fri, 25 Jul 2025 16:39:13 -0700 Subject: [PATCH 22/39] debug - dump /proc/cpuinfo file lines as they are read --- onnxruntime/core/platform/linux/cpuinfo.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/onnxruntime/core/platform/linux/cpuinfo.cc b/onnxruntime/core/platform/linux/cpuinfo.cc index 51fa3282534ba..f848fc44c3de1 100644 --- a/onnxruntime/core/platform/linux/cpuinfo.cc +++ b/onnxruntime/core/platform/linux/cpuinfo.cc @@ -4,6 +4,7 @@ #include "core/platform/linux/cpuinfo.h" #include +#include // TODO for debugging - remove later #include #include @@ -51,6 +52,7 @@ Status ParseCpuInfoFile(const std::string& cpu_info_file, CpuInfo& cpu_info_out) }; for (std::string line{}; std::getline(in, line);) { + std::cerr << "/proc/cpuinfo line: " << line << "\n"; line = utils::TrimString(line); if (line.empty()) { From d0f9653b268183c9bfc4f3e0e1463570f861b0a8 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Fri, 25 Jul 2025 19:16:05 -0700 Subject: [PATCH 23/39] make /proc/cpuinfo vendor field not mandatory, remove debug output --- onnxruntime/core/platform/linux/cpuinfo.cc | 27 +++++++++++-------- .../test/platform/device_discovery_test.cc | 4 +++ 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/onnxruntime/core/platform/linux/cpuinfo.cc b/onnxruntime/core/platform/linux/cpuinfo.cc index f848fc44c3de1..f9a45e620bcc8 100644 --- a/onnxruntime/core/platform/linux/cpuinfo.cc +++ b/onnxruntime/core/platform/linux/cpuinfo.cc @@ -4,7 +4,6 @@ #include "core/platform/linux/cpuinfo.h" #include -#include // TODO for debugging - remove later #include #include @@ -16,12 +15,14 @@ namespace onnxruntime { namespace { using KeyValuePairs = std::map>; -Status GetValue(const KeyValuePairs& key_value_pairs, std::string_view key, - std::string_view& value) { +bool TryGetValue(const KeyValuePairs& key_value_pairs, std::string_view key, std::string& value) { auto it = key_value_pairs.find(key); - ORT_RETURN_IF(it == key_value_pairs.end(), "Failed to find key: ", key); + if (it == key_value_pairs.end()) { + return false; + } + value = it->second; - return Status::OK(); + return true; } } // namespace @@ -35,14 +36,19 @@ Status ParseCpuInfoFile(const std::string& cpu_info_file, CpuInfo& cpu_info_out) auto add_processor_info = [&]() -> Status { if (!key_value_pairs.empty()) { - std::string_view value{}; CpuInfoFileProcessorInfo processor_info{}; - ORT_RETURN_IF_ERROR(GetValue(key_value_pairs, "processor", value)); - ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value, processor_info.processor)); + { + std::string processor_str{}; + ORT_RETURN_IF_NOT(TryGetValue(key_value_pairs, "processor", processor_str), "Failed to get processor value."); + ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(processor_str, processor_info.processor)); + } - ORT_RETURN_IF_ERROR(GetValue(key_value_pairs, "vendor", value)); - processor_info.vendor_id = std::string{value}; + // Try to get a vendor name. + // This approach doesn't always work, e.g., for ARM processors. + if (!TryGetValue(key_value_pairs, "vendor", processor_info.vendor_id)) { + // TODO try something else? + } cpu_info.emplace_back(std::move(processor_info)); @@ -52,7 +58,6 @@ Status ParseCpuInfoFile(const std::string& cpu_info_file, CpuInfo& cpu_info_out) }; for (std::string line{}; std::getline(in, line);) { - std::cerr << "/proc/cpuinfo line: " << line << "\n"; line = utils::TrimString(line); if (line.empty()) { diff --git a/onnxruntime/test/platform/device_discovery_test.cc b/onnxruntime/test/platform/device_discovery_test.cc index 0d78fe6e25909..5b837279df8e5 100644 --- a/onnxruntime/test/platform/device_discovery_test.cc +++ b/onnxruntime/test/platform/device_discovery_test.cc @@ -24,7 +24,11 @@ std::vector GetDevicesByType(OrtHardwareDeviceType device_typ TEST(DeviceDiscoveryTest, HasCpuDevice) { const auto cpu_devices = GetDevicesByType(OrtHardwareDeviceType_CPU); ASSERT_GT(cpu_devices.size(), 0); +#if defined(__linux__) && (defined(__aarch64__) || defined(__arm__)) + // TODO vendor_id is not properly set for Linux and ARM yet +#else ASSERT_NE(cpu_devices[0].vendor_id, 0); +#endif } } // namespace onnxruntime::test From a56c2ff50474ceb4ef0090024ecc65f0e1ca7c01 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Fri, 25 Jul 2025 19:34:32 -0700 Subject: [PATCH 24/39] update comment --- onnxruntime/test/platform/device_discovery_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/platform/device_discovery_test.cc b/onnxruntime/test/platform/device_discovery_test.cc index 5b837279df8e5..80b24228b6d4f 100644 --- a/onnxruntime/test/platform/device_discovery_test.cc +++ b/onnxruntime/test/platform/device_discovery_test.cc @@ -25,7 +25,7 @@ TEST(DeviceDiscoveryTest, HasCpuDevice) { const auto cpu_devices = GetDevicesByType(OrtHardwareDeviceType_CPU); ASSERT_GT(cpu_devices.size(), 0); #if defined(__linux__) && (defined(__aarch64__) || defined(__arm__)) - // TODO vendor_id is not properly set for Linux and ARM yet + // TODO vendor_id is not properly set for ARM Linux yet #else ASSERT_NE(cpu_devices[0].vendor_id, 0); #endif From 5b721eca455a84b3d38bd2d74beba00bd1b404ef Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Mon, 28 Jul 2025 16:50:31 -0700 Subject: [PATCH 25/39] add some ARM CPU vendor detection support --- onnxruntime/core/common/cpuid_info.cc | 9 ++-- onnxruntime/core/platform/linux/cpuinfo.cc | 60 +++++++++++++++++++--- onnxruntime/core/platform/linux/cpuinfo.h | 6 +-- 3 files changed, 60 insertions(+), 15 deletions(-) diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index 7fe7647b72f42..ef6d8b9292e75 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -198,11 +198,14 @@ std::string CPUIDInfo::GetX86Vendor(int32_t* data) { #endif // defined(CPUIDINFO_ARCH_X86) uint32_t CPUIDInfo::GetVendorId(const std::string& vendor) { - if (vendor == "GenuineIntel") return 0x8086; - if (vendor == "AuthenticAMD") return 0x1022; + if (vendor == "Intel" || vendor == "GenuineIntel") return 0x8086; + if (vendor == "AMD" || vendor == "AuthenticAMD") return 0x1022; if (vendor.find("Qualcomm") == 0) return 'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24); - if (vendor.find("NV") == 0) return 0x10DE; + if (vendor == "Nvidia" || vendor.find("NV") == 0) return 0x10DE; if (vendor == "Apple") return 0x106B; + if (vendor == "ARM") return 0x13B5; + + LogWarning(MakeString("Unable to determine vendor ID from vendor string: ", vendor)); return 0; } diff --git a/onnxruntime/core/platform/linux/cpuinfo.cc b/onnxruntime/core/platform/linux/cpuinfo.cc index f9a45e620bcc8..13160d201ca89 100644 --- a/onnxruntime/core/platform/linux/cpuinfo.cc +++ b/onnxruntime/core/platform/linux/cpuinfo.cc @@ -24,14 +24,52 @@ bool TryGetValue(const KeyValuePairs& key_value_pairs, std::string_view key, std value = it->second; return true; } + +std::string ArmCpuImplementerIdToVendorName(uint32_t implementer_id) { + // ARM CPU implementer ids are copied from here: + // https://github.com/torvalds/linux/blob/038d61fd642278bab63ee8ef722c50d10ab01e8f/arch/arm64/include/asm/cputype.h#L54-L64 + // https://github.com/torvalds/linux/blob/038d61fd642278bab63ee8ef722c50d10ab01e8f/arch/arm/include/asm/cputype.h#L65-L68 + + switch (implementer_id) { + case 0x41: + return "ARM"; + case 0x42: + return "Broadcom"; + case 0x44: + return "DEC"; + case 0x43: + return "Cavium"; + case 0x46: + return "Fujitsu"; + case 0x48: + return "HiSilicon"; + case 0x4E: + return "Nvidia"; + case 0x50: + return "APM"; + case 0x51: + return "Qualcomm"; + case 0x61: + return "Apple"; + case 0x69: + return "Intel"; + case 0x6D: + return "Microsoft"; + case 0xC0: + return "Ampere"; + + default: + return "unknown"; + } +} } // namespace -Status ParseCpuInfoFile(const std::string& cpu_info_file, CpuInfo& cpu_info_out) { +Status ParseCpuInfoFile(const std::string& cpu_info_file, std::vector& cpu_infos_out) { std::ifstream in{cpu_info_file}; ORT_RETURN_IF_NOT(in, "Failed to open file: ", cpu_info_file); - CpuInfo cpu_info{}; + std::vector cpu_infos{}; KeyValuePairs key_value_pairs{}; auto add_processor_info = [&]() -> Status { @@ -44,13 +82,19 @@ Status ParseCpuInfoFile(const std::string& cpu_info_file, CpuInfo& cpu_info_out) ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(processor_str, processor_info.processor)); } - // Try to get a vendor name. - // This approach doesn't always work, e.g., for ARM processors. - if (!TryGetValue(key_value_pairs, "vendor", processor_info.vendor_id)) { - // TODO try something else? + // Try to get a vendor string. + if (std::string vendor_id; + TryGetValue(key_value_pairs, "vendor_id", vendor_id)) { + processor_info.vendor = std::move(vendor_id); + } else if (std::string implementer_id_str; + TryGetValue(key_value_pairs, "CPU implementer", implementer_id_str)) { + const auto implementer_id = ParseStringWithClassicLocale(implementer_id_str); + processor_info.vendor = ArmCpuImplementerIdToVendorName(implementer_id); + } else { + processor_info.vendor = "unknown"; } - cpu_info.emplace_back(std::move(processor_info)); + cpu_infos.emplace_back(std::move(processor_info)); key_value_pairs.clear(); } @@ -73,7 +117,7 @@ Status ParseCpuInfoFile(const std::string& cpu_info_file, CpuInfo& cpu_info_out) ORT_RETURN_IF_ERROR(add_processor_info()); - cpu_info_out = std::move(cpu_info); + cpu_infos_out = std::move(cpu_infos); return Status::OK(); } diff --git a/onnxruntime/core/platform/linux/cpuinfo.h b/onnxruntime/core/platform/linux/cpuinfo.h index 825ca8c4e4e6b..d56541d76b1b3 100644 --- a/onnxruntime/core/platform/linux/cpuinfo.h +++ b/onnxruntime/core/platform/linux/cpuinfo.h @@ -13,14 +13,12 @@ namespace onnxruntime { struct CpuInfoFileProcessorInfo { size_t processor; - std::string vendor_id; + std::string vendor; // There are plenty of other fields. We can add more if needed. }; -using CpuInfo = std::vector; - -Status ParseCpuInfoFile(const std::string& cpu_info_file, CpuInfo& cpu_info); +Status ParseCpuInfoFile(const std::string& cpu_info_file, std::vector& cpu_infos); inline Status ParseCpuInfoFile(CpuInfo& cpu_info) { return ParseCpuInfoFile("/proc/cpuinfo", cpu_info); From c31e336942641c66457d419012a33fead26a6018 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Tue, 29 Jul 2025 22:38:59 +0000 Subject: [PATCH 26/39] fix build issues --- onnxruntime/core/common/cpuid_info.cc | 8 ++++---- onnxruntime/core/platform/linux/cpuinfo.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index ef6d8b9292e75..feedb7c7965ca 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -264,15 +264,15 @@ void CPUIDInfo::ArmLinuxInit() { std::string CPUIDInfo::GetArmLinuxVendor() { std::string vendor{}; - CpuInfo cpu_info{}; - Status parse_status = ParseCpuInfoFile(cpu_info); + std::vector cpu_infos{}; + Status parse_status = ParseCpuInfoFile(cpu_infos); if (!parse_status.IsOK()) { LogWarning(MakeString("Failed to parse /proc/cpuinfo file. Error: ", parse_status)); } - if (cpu_info.size() > 0) { + if (cpu_infos.size() > 0) { // just use the vendor from the first processor's information - vendor = cpu_info[0].vendor_id; + vendor = cpu_infos[0].vendor; } return vendor; diff --git a/onnxruntime/core/platform/linux/cpuinfo.h b/onnxruntime/core/platform/linux/cpuinfo.h index d56541d76b1b3..1859d167b441a 100644 --- a/onnxruntime/core/platform/linux/cpuinfo.h +++ b/onnxruntime/core/platform/linux/cpuinfo.h @@ -20,7 +20,7 @@ struct CpuInfoFileProcessorInfo { Status ParseCpuInfoFile(const std::string& cpu_info_file, std::vector& cpu_infos); -inline Status ParseCpuInfoFile(CpuInfo& cpu_info) { +inline Status ParseCpuInfoFile(std::vector& cpu_info) { return ParseCpuInfoFile("/proc/cpuinfo", cpu_info); } From 9ae765ab284a45a816f04e0718e144cce936f9c5 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Tue, 29 Jul 2025 20:37:38 -0700 Subject: [PATCH 27/39] replace /proc/cpuinfo parsing with cpuinfo library usage --- cmake/onnxruntime_common.cmake | 9 - onnxruntime/core/common/cpuid_info.cc | 124 +--------- onnxruntime/core/common/cpuid_info.h | 66 ++--- onnxruntime/core/common/cpuid_info_vendor.cc | 244 +++++++++++++++++++ onnxruntime/core/platform/linux/cpuinfo.cc | 124 ---------- onnxruntime/core/platform/linux/cpuinfo.h | 27 -- 6 files changed, 288 insertions(+), 306 deletions(-) create mode 100644 onnxruntime/core/common/cpuid_info_vendor.cc delete mode 100644 onnxruntime/core/platform/linux/cpuinfo.cc delete mode 100644 onnxruntime/core/platform/linux/cpuinfo.h diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake index 172de95aa6c19..4b312b07e186e 100644 --- a/cmake/onnxruntime_common.cmake +++ b/cmake/onnxruntime_common.cmake @@ -55,13 +55,6 @@ else() "${ONNXRUNTIME_ROOT}/core/platform/posix/stacktrace.cc" ) - if(LINUX) - list(APPEND onnxruntime_common_src_patterns - "${ONNXRUNTIME_ROOT}/core/platform/linux/cpuinfo.h" - "${ONNXRUNTIME_ROOT}/core/platform/linux/cpuinfo.cc" - ) - endif() - # logging files if (onnxruntime_USE_SYSLOG) list(APPEND onnxruntime_common_src_patterns @@ -250,8 +243,6 @@ endif() if (RISCV64 OR ARM64 OR ARM OR X86 OR X64 OR X86_64) # Link cpuinfo if supported - # Using it mainly in ARM with Android. - # Its functionality in detecting x86 cpu features are lacking, so is support for Windows. if (CPUINFO_SUPPORTED) onnxruntime_add_include_to_target(onnxruntime_common cpuinfo::cpuinfo) list(APPEND onnxruntime_EXTERNAL_LIBRARIES cpuinfo::cpuinfo ${ONNXRUNTIME_CLOG_TARGET_NAME}) diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index feedb7c7965ca..6c66047b4b36a 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -43,8 +43,6 @@ #endif // ARM -#include "core/platform/linux/cpuinfo.h" - #endif // Linux #if _WIN32 @@ -88,12 +86,7 @@ void decodeMIDR(uint32_t midr, uint32_t uarch[1]); namespace onnxruntime { -namespace { - -// Log function that uses ORT logging if available or writes to stderr. -// This enables us to log even before ORT logging has been initialized. -[[maybe_unused]] -void LogWarning(std::string_view message) { +void CPUIDInfo::LogEarlyWarning(std::string_view message) { if (logging::LoggingManager::HasDefaultLogger()) { LOGS_DEFAULT(WARNING) << message; } else { @@ -101,8 +94,6 @@ void LogWarning(std::string_view message) { } } -} // namespace - #if defined(CPUIDINFO_ARCH_X86) static inline void GetCPUID(int function_id, int data[4]) { // NOLINT @@ -137,9 +128,6 @@ void CPUIDInfo::X86Init() { int data[4] = {-1}; GetCPUID(0, data); - vendor_ = GetX86Vendor(data); - vendor_id_ = GetVendorId(vendor_); - int num_IDs = data[0]; if (num_IDs >= 1) { GetCPUID(1, data); @@ -187,36 +175,13 @@ void CPUIDInfo::X86Init() { } } -std::string CPUIDInfo::GetX86Vendor(int32_t* data) { - char vendor[sizeof(int32_t) * 3 + 1]{}; - *reinterpret_cast(vendor + 0) = data[1]; - *reinterpret_cast(vendor + 4) = data[3]; - *reinterpret_cast(vendor + 8) = data[2]; - return vendor; -} - #endif // defined(CPUIDINFO_ARCH_X86) -uint32_t CPUIDInfo::GetVendorId(const std::string& vendor) { - if (vendor == "Intel" || vendor == "GenuineIntel") return 0x8086; - if (vendor == "AMD" || vendor == "AuthenticAMD") return 0x1022; - if (vendor.find("Qualcomm") == 0) return 'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24); - if (vendor == "Nvidia" || vendor.find("NV") == 0) return 0x10DE; - if (vendor == "Apple") return 0x106B; - if (vendor == "ARM") return 0x13B5; - - LogWarning(MakeString("Unable to determine vendor ID from vendor string: ", vendor)); - return 0; -} - #if defined(CPUIDINFO_ARCH_ARM) #if defined(__linux__) void CPUIDInfo::ArmLinuxInit() { - vendor_ = GetArmLinuxVendor(); - vendor_id_ = GetVendorId(vendor_); - // Assuming no hyper-threading, no NUMA groups #if defined(CPUINFO_SUPPORTED) if (pytorch_cpuinfo_init_) { @@ -261,30 +226,9 @@ void CPUIDInfo::ArmLinuxInit() { } } -std::string CPUIDInfo::GetArmLinuxVendor() { - std::string vendor{}; - - std::vector cpu_infos{}; - Status parse_status = ParseCpuInfoFile(cpu_infos); - if (!parse_status.IsOK()) { - LogWarning(MakeString("Failed to parse /proc/cpuinfo file. Error: ", parse_status)); - } - - if (cpu_infos.size() > 0) { - // just use the vendor from the first processor's information - vendor = cpu_infos[0].vendor; - } - - return vendor; -} - #elif defined(_WIN32) // ^ defined(__linux__) void CPUIDInfo::ArmWindowsInit() { - // Get the ARM vendor string from the registry - vendor_ = GetArmWindowsVendor(); - vendor_id_ = GetVendorId(vendor_); - // Read MIDR and ID_AA64ISAR1_EL1 register values from Windows registry // There should be one per CPU std::vector midr_values{}, id_aa64isar1_el1_values{}; @@ -376,21 +320,9 @@ void CPUIDInfo::ArmWindowsInit() { #endif // defined(CPUINFO_SUPPORTED) } -std::string CPUIDInfo::GetArmWindowsVendor() { - const int MAX_VALUE_NAME = 256; - const CHAR vendorKey[] = "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"; - CHAR vendorVal[MAX_VALUE_NAME] = ""; - unsigned long vendorSize = sizeof(char) * MAX_VALUE_NAME; - ::RegGetValueA(HKEY_LOCAL_MACHINE, vendorKey, "Vendor Identifier", RRF_RT_REG_SZ | RRF_ZEROONFAILURE, nullptr, &vendorVal, &vendorSize); - return vendorVal; -} - #elif defined(__APPLE__) // ^ defined(_WIN32) void CPUIDInfo::ArmAppleInit() { - vendor_ = GetArmAppleVendor(); - vendor_id_ = GetVendorId(vendor_); - #if defined(CPUINFO_SUPPORTED) if (pytorch_cpuinfo_init_) { is_hybrid_ = cpuinfo_get_uarchs_count() > 1; @@ -412,45 +344,6 @@ void CPUIDInfo::ArmAppleInit() { } } -std::string CPUIDInfo::GetArmAppleVendor() { - auto get_sysctl_value = [](const char* key) -> std::optional { - size_t value_length{}; - if (sysctlbyname(key, nullptr, &value_length, nullptr, 0) != 0) { - const auto error = errno; - if (error == ENOENT) { - // key not found - } else { - LogWarning(MakeString("Failed to get '", key, "' value length with sysctlbyname(). Error: ", error)); - } - return std::nullopt; - } - - std::string value{}; - value.resize(value_length); - if (sysctlbyname(key, value.data(), &value_length, nullptr, 0) != 0) { - const auto error = errno; - LogWarning(MakeString("Failed to get '", key, "' value with sysctlbyname(). Error: ", error)); - } - - return value; - }; - - constexpr auto vendor_key = "machdep.cpu.vendor"; - if (auto vendor = get_sysctl_value(vendor_key); vendor.has_value()) { - return *vendor; - } - - constexpr auto brand_string_key = "machdep.cpu.brand_string"; - if (auto brand_string = get_sysctl_value(brand_string_key); brand_string.has_value()) { - if (brand_string->find("Apple") != std::string::npos) { - return "Apple"; - } - } - - LogWarning("Unable to determine CPU vendor."); - return ""; -} - #endif // defined(__APPLE__) #endif // defined(CPUIDINFO_ARCH_ARM) @@ -471,16 +364,21 @@ uint32_t CPUIDInfo::GetCurrentCoreIdx() const { } CPUIDInfo::CPUIDInfo() { -#ifdef CPUIDINFO_ARCH_X86 - X86Init(); -#elif defined(CPUIDINFO_ARCH_ARM) #if defined(CPUINFO_SUPPORTED) pytorch_cpuinfo_init_ = cpuinfo_initialize(); if (!pytorch_cpuinfo_init_) { - LOGS_DEFAULT(WARNING) << "Failed to initialize PyTorch cpuinfo library. May cause CPU EP performance degradation " - "due to undetected CPU features."; + LogEarlyWarning( + "Failed to initialize PyTorch cpuinfo library. May cause CPU EP performance degradation due to undetected CPU " + "features."); } #endif // defined(CPUINFO_SUPPORTED) + + // Note: This should be run after cpuinfo initialization if cpuinfo is enabled. + VendorInfoInit(); + +#ifdef CPUIDINFO_ARCH_X86 + X86Init(); +#elif defined(CPUIDINFO_ARCH_ARM) #if defined(__linux__) ArmLinuxInit(); #elif defined(_WIN32) diff --git a/onnxruntime/core/common/cpuid_info.h b/onnxruntime/core/common/cpuid_info.h index 2fc7c449cd88e..d49eca7e1d60c 100644 --- a/onnxruntime/core/common/cpuid_info.h +++ b/onnxruntime/core/common/cpuid_info.h @@ -103,7 +103,40 @@ class CPUIDInfo { } private: + // Log function that uses ORT logging if available or writes to stderr. + // This enables us to log even before ORT logging has been initialized. + static void LogEarlyWarning(std::string_view message); + CPUIDInfo(); + + void VendorInfoInit(); + +#if defined(CPUIDINFO_ARCH_X86) + + void X86Init(); + +#elif defined(CPUIDINFO_ARCH_ARM) + +#if defined(__linux__) + + void ArmLinuxInit(); + +#elif defined(_WIN32) + + void ArmWindowsInit(); + +#elif defined(__APPLE__) + + void ArmAppleInit(); + +#endif + +#endif // defined(CPUIDINFO_ARCH_ARM) + +#if defined(CPUINFO_SUPPORTED) + bool pytorch_cpuinfo_init_{false}; +#endif // defined(CPUINFO_SUPPORTED) + bool has_amx_bf16_{false}; bool has_avx_{false}; bool has_avx2_{false}; @@ -132,39 +165,6 @@ class CPUIDInfo { std::string vendor_; uint32_t vendor_id_; - - uint32_t GetVendorId(const std::string& vendor); - -#if defined(CPUIDINFO_ARCH_X86) - - void X86Init(); - std::string GetX86Vendor(int32_t* data); - -#elif defined(CPUIDINFO_ARCH_ARM) - -#if defined(CPUINFO_SUPPORTED) - // Now the following var is only used in ARM build, but later on we may expand the usage. - bool pytorch_cpuinfo_init_{false}; -#endif // defined(CPUINFO_SUPPORTED) - -#if defined(__linux__) - - void ArmLinuxInit(); - std::string GetArmLinuxVendor(); - -#elif defined(_WIN32) - - void ArmWindowsInit(); - std::string GetArmWindowsVendor(); - -#elif defined(__APPLE__) - - void ArmAppleInit(); - std::string GetArmAppleVendor(); - -#endif - -#endif // defined(CPUIDINFO_ARCH_ARM) }; } // namespace onnxruntime diff --git a/onnxruntime/core/common/cpuid_info_vendor.cc b/onnxruntime/core/common/cpuid_info_vendor.cc new file mode 100644 index 0000000000000..9e08258c9875f --- /dev/null +++ b/onnxruntime/core/common/cpuid_info_vendor.cc @@ -0,0 +1,244 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/common/cpuid_info.h" + +#include +#include +#include + +#if defined(CPUINFO_SUPPORTED) +#include "cpuinfo.h" +#endif + +namespace { + +#if !defined(CPUINFO_SUPPORTED) + +// The `cpuinfo_vendor` enum is defined by the cpuinfo library. +// In case we don't build with cpuinfo, we define our own copy. +// The enum was copied from here: +// https://github.com/pytorch/cpuinfo/blob/8a1772a0c5c447df2d18edf33ec4603a8c9c04a6/include/cpuinfo.h#L154-L307 + +/** Vendor of processor core design */ +enum cpuinfo_vendor { + /** Processor vendor is not known to the library, or the library failed + to get vendor information from the OS. */ + cpuinfo_vendor_unknown = 0, + + /* Active vendors of modern CPUs */ + + /** + * Intel Corporation. Vendor of x86, x86-64, IA64, and ARM processor + * microarchitectures. + * + * Sold its ARM design subsidiary in 2006. The last ARM processor design + * was released in 2004. + */ + cpuinfo_vendor_intel = 1, + /** Advanced Micro Devices, Inc. Vendor of x86 and x86-64 processor + microarchitectures. */ + cpuinfo_vendor_amd = 2, + /** ARM Holdings plc. Vendor of ARM and ARM64 processor + microarchitectures. */ + cpuinfo_vendor_arm = 3, + /** Qualcomm Incorporated. Vendor of ARM and ARM64 processor + microarchitectures. */ + cpuinfo_vendor_qualcomm = 4, + /** Apple Inc. Vendor of ARM and ARM64 processor microarchitectures. */ + cpuinfo_vendor_apple = 5, + /** Samsung Electronics Co., Ltd. Vendir if ARM64 processor + microarchitectures. */ + cpuinfo_vendor_samsung = 6, + /** Nvidia Corporation. Vendor of ARM64-compatible processor + microarchitectures. */ + cpuinfo_vendor_nvidia = 7, + /** MIPS Technologies, Inc. Vendor of MIPS processor microarchitectures. + */ + cpuinfo_vendor_mips = 8, + /** International Business Machines Corporation. Vendor of PowerPC + processor microarchitectures. */ + cpuinfo_vendor_ibm = 9, + /** Ingenic Semiconductor. Vendor of MIPS processor microarchitectures. + */ + cpuinfo_vendor_ingenic = 10, + /** + * VIA Technologies, Inc. Vendor of x86 and x86-64 processor + * microarchitectures. + * + * Processors are designed by Centaur Technology, a subsidiary of VIA + * Technologies. + */ + cpuinfo_vendor_via = 11, + /** Cavium, Inc. Vendor of ARM64 processor microarchitectures. */ + cpuinfo_vendor_cavium = 12, + /** Broadcom, Inc. Vendor of ARM processor microarchitectures. */ + cpuinfo_vendor_broadcom = 13, + /** Applied Micro Circuits Corporation (APM). Vendor of ARM64 processor + microarchitectures. */ + cpuinfo_vendor_apm = 14, + /** + * Huawei Technologies Co., Ltd. Vendor of ARM64 processor + * microarchitectures. + * + * Processors are designed by HiSilicon, a subsidiary of Huawei. + */ + cpuinfo_vendor_huawei = 15, + /** + * Hygon (Chengdu Haiguang Integrated Circuit Design Co., Ltd), Vendor + * of x86-64 processor microarchitectures. + * + * Processors are variants of AMD cores. + */ + cpuinfo_vendor_hygon = 16, + /** SiFive, Inc. Vendor of RISC-V processor microarchitectures. */ + cpuinfo_vendor_sifive = 17, + + /* Active vendors of embedded CPUs */ + + /** Texas Instruments Inc. Vendor of ARM processor microarchitectures. + */ + cpuinfo_vendor_texas_instruments = 30, + /** Marvell Technology Group Ltd. Vendor of ARM processor + * microarchitectures. + */ + cpuinfo_vendor_marvell = 31, + /** RDC Semiconductor Co., Ltd. Vendor of x86 processor + microarchitectures. */ + cpuinfo_vendor_rdc = 32, + /** DM&P Electronics Inc. Vendor of x86 processor microarchitectures. */ + cpuinfo_vendor_dmp = 33, + /** Motorola, Inc. Vendor of PowerPC and ARM processor + microarchitectures. */ + cpuinfo_vendor_motorola = 34, + + /* Defunct CPU vendors */ + + /** + * Transmeta Corporation. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 2004. + * Transmeta processors implemented VLIW ISA and used binary translation + * to execute x86 code. + */ + cpuinfo_vendor_transmeta = 50, + /** + * Cyrix Corporation. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1996. + */ + cpuinfo_vendor_cyrix = 51, + /** + * Rise Technology. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1999. + */ + cpuinfo_vendor_rise = 52, + /** + * National Semiconductor. Vendor of x86 processor microarchitectures. + * + * Sold its x86 design subsidiary in 1999. The last processor design was + * released in 1998. + */ + cpuinfo_vendor_nsc = 53, + /** + * Silicon Integrated Systems. Vendor of x86 processor + * microarchitectures. + * + * Sold its x86 design subsidiary in 2001. The last processor design was + * released in 2001. + */ + cpuinfo_vendor_sis = 54, + /** + * NexGen. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1994. + * NexGen designed the first x86 microarchitecture which decomposed x86 + * instructions into simple microoperations. + */ + cpuinfo_vendor_nexgen = 55, + /** + * United Microelectronics Corporation. Vendor of x86 processor + * microarchitectures. + * + * Ceased x86 in the early 1990s. The last processor design was released + * in 1991. Designed U5C and U5D processors. Both are 486 level. + */ + cpuinfo_vendor_umc = 56, + /** + * Digital Equipment Corporation. Vendor of ARM processor + * microarchitecture. + * + * Sold its ARM designs in 1997. The last processor design was released + * in 1997. + */ + cpuinfo_vendor_dec = 57, +}; + +#endif + +} // namespace + +namespace onnxruntime { + +namespace { + +struct CpuVendorInfo { + cpuinfo_vendor vendor; + std::string_view name; + uint32_t id; +}; + +constexpr auto kUnknownCpuVendorInfo = CpuVendorInfo{cpuinfo_vendor_unknown, "unknown", 0x0000}; + +constexpr std::array kCpuVendorInfos{ + CpuVendorInfo{cpuinfo_vendor_amd, "AMD", 0x1022}, + CpuVendorInfo{cpuinfo_vendor_intel, "Intel", 0x8086}, + CpuVendorInfo{cpuinfo_vendor_qualcomm, "Qualcomm", uint32_t{'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24)}}, + CpuVendorInfo{cpuinfo_vendor_nvidia, "Nvidia", 0x10DE}, + CpuVendorInfo{cpuinfo_vendor_apple, "Apple", 0x106B}, + CpuVendorInfo{cpuinfo_vendor_arm, "ARM", 0x13B5}, + + // TODO add more as needed +}; + +constexpr const CpuVendorInfo* FindCpuVendorInfo(cpuinfo_vendor vendor) { + const auto vendor_mapping_it = std::find_if(kCpuVendorInfos.begin(), kCpuVendorInfos.end(), + [vendor](const CpuVendorInfo& entry) { + return entry.vendor == vendor; + }); + + if (vendor_mapping_it != kCpuVendorInfos.end()) { + return &*vendor_mapping_it; + } + + return nullptr; +} + +} // namespace + +void CPUIDInfo::VendorInfoInit() { + const cpuinfo_vendor vendor = [&]() { + cpuinfo_vendor result = cpuinfo_vendor_unknown; +#if defined(CPUINFO_SUPPORTED) + if (pytorch_cpuinfo_init_) { + const auto* processor = cpuinfo_get_processor(0); + if (processor && processor->core) { + result = processor->core->vendor; + } + } +#endif // defined(CPUINFO_SUPPORTED) + return result; + }(); + + const auto* vendor_info = FindCpuVendorInfo(vendor); + if (vendor_info == nullptr) { + LogEarlyWarning(MakeString("Unknown CPU vendor. cpuinfo_vendor value: ", static_cast(vendor))); + vendor_info = &kUnknownCpuVendorInfo; + } + + vendor_ = vendor_info->name; + vendor_id_ = vendor_info->id; +} + +} // namespace onnxruntime diff --git a/onnxruntime/core/platform/linux/cpuinfo.cc b/onnxruntime/core/platform/linux/cpuinfo.cc deleted file mode 100644 index 13160d201ca89..0000000000000 --- a/onnxruntime/core/platform/linux/cpuinfo.cc +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#include "core/platform/linux/cpuinfo.h" - -#include -#include -#include - -#include "core/common/string_utils.h" -#include "core/common/parse_string.h" - -namespace onnxruntime { - -namespace { -using KeyValuePairs = std::map>; - -bool TryGetValue(const KeyValuePairs& key_value_pairs, std::string_view key, std::string& value) { - auto it = key_value_pairs.find(key); - if (it == key_value_pairs.end()) { - return false; - } - - value = it->second; - return true; -} - -std::string ArmCpuImplementerIdToVendorName(uint32_t implementer_id) { - // ARM CPU implementer ids are copied from here: - // https://github.com/torvalds/linux/blob/038d61fd642278bab63ee8ef722c50d10ab01e8f/arch/arm64/include/asm/cputype.h#L54-L64 - // https://github.com/torvalds/linux/blob/038d61fd642278bab63ee8ef722c50d10ab01e8f/arch/arm/include/asm/cputype.h#L65-L68 - - switch (implementer_id) { - case 0x41: - return "ARM"; - case 0x42: - return "Broadcom"; - case 0x44: - return "DEC"; - case 0x43: - return "Cavium"; - case 0x46: - return "Fujitsu"; - case 0x48: - return "HiSilicon"; - case 0x4E: - return "Nvidia"; - case 0x50: - return "APM"; - case 0x51: - return "Qualcomm"; - case 0x61: - return "Apple"; - case 0x69: - return "Intel"; - case 0x6D: - return "Microsoft"; - case 0xC0: - return "Ampere"; - - default: - return "unknown"; - } -} -} // namespace - -Status ParseCpuInfoFile(const std::string& cpu_info_file, std::vector& cpu_infos_out) { - std::ifstream in{cpu_info_file}; - - ORT_RETURN_IF_NOT(in, "Failed to open file: ", cpu_info_file); - - std::vector cpu_infos{}; - KeyValuePairs key_value_pairs{}; - - auto add_processor_info = [&]() -> Status { - if (!key_value_pairs.empty()) { - CpuInfoFileProcessorInfo processor_info{}; - - { - std::string processor_str{}; - ORT_RETURN_IF_NOT(TryGetValue(key_value_pairs, "processor", processor_str), "Failed to get processor value."); - ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(processor_str, processor_info.processor)); - } - - // Try to get a vendor string. - if (std::string vendor_id; - TryGetValue(key_value_pairs, "vendor_id", vendor_id)) { - processor_info.vendor = std::move(vendor_id); - } else if (std::string implementer_id_str; - TryGetValue(key_value_pairs, "CPU implementer", implementer_id_str)) { - const auto implementer_id = ParseStringWithClassicLocale(implementer_id_str); - processor_info.vendor = ArmCpuImplementerIdToVendorName(implementer_id); - } else { - processor_info.vendor = "unknown"; - } - - cpu_infos.emplace_back(std::move(processor_info)); - - key_value_pairs.clear(); - } - return Status::OK(); - }; - - for (std::string line{}; std::getline(in, line);) { - line = utils::TrimString(line); - - if (line.empty()) { - ORT_RETURN_IF_ERROR(add_processor_info()); - continue; - } - - auto parts = utils::SplitString(line, ":"); - ORT_RETURN_IF_NOT(parts.size() == 2, "Unexpected format. Line: '", line, "'"); - - key_value_pairs.emplace(utils::TrimString(parts[0]), utils::TrimString(parts[1])); - } - - ORT_RETURN_IF_ERROR(add_processor_info()); - - cpu_infos_out = std::move(cpu_infos); - return Status::OK(); -} - -} // namespace onnxruntime diff --git a/onnxruntime/core/platform/linux/cpuinfo.h b/onnxruntime/core/platform/linux/cpuinfo.h deleted file mode 100644 index 1859d167b441a..0000000000000 --- a/onnxruntime/core/platform/linux/cpuinfo.h +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#pragma once - -#include - -#include "core/common/status.h" - -#include - -namespace onnxruntime { - -struct CpuInfoFileProcessorInfo { - size_t processor; - std::string vendor; - - // There are plenty of other fields. We can add more if needed. -}; - -Status ParseCpuInfoFile(const std::string& cpu_info_file, std::vector& cpu_infos); - -inline Status ParseCpuInfoFile(std::vector& cpu_info) { - return ParseCpuInfoFile("/proc/cpuinfo", cpu_info); -} - -} // namespace onnxruntime From 53dbbf959617e51c1c73c25758799de78a02aad9 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 30 Jul 2025 11:09:25 -0700 Subject: [PATCH 28/39] try enabling vendor id check in test --- onnxruntime/test/platform/device_discovery_test.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/onnxruntime/test/platform/device_discovery_test.cc b/onnxruntime/test/platform/device_discovery_test.cc index 80b24228b6d4f..0d78fe6e25909 100644 --- a/onnxruntime/test/platform/device_discovery_test.cc +++ b/onnxruntime/test/platform/device_discovery_test.cc @@ -24,11 +24,7 @@ std::vector GetDevicesByType(OrtHardwareDeviceType device_typ TEST(DeviceDiscoveryTest, HasCpuDevice) { const auto cpu_devices = GetDevicesByType(OrtHardwareDeviceType_CPU); ASSERT_GT(cpu_devices.size(), 0); -#if defined(__linux__) && (defined(__aarch64__) || defined(__arm__)) - // TODO vendor_id is not properly set for ARM Linux yet -#else ASSERT_NE(cpu_devices[0].vendor_id, 0); -#endif } } // namespace onnxruntime::test From d9a7a5ab7fcd632ebfe038ec71d44d33773a9ebf Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 30 Jul 2025 12:02:14 -0700 Subject: [PATCH 29/39] add endif comment, remove constexpr on FindCpuVendorInfo() because std::find_if is not necessarily constexpr. --- onnxruntime/core/common/cpuid_info_vendor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/common/cpuid_info_vendor.cc b/onnxruntime/core/common/cpuid_info_vendor.cc index 9e08258c9875f..d4d940eedfe28 100644 --- a/onnxruntime/core/common/cpuid_info_vendor.cc +++ b/onnxruntime/core/common/cpuid_info_vendor.cc @@ -175,7 +175,7 @@ enum cpuinfo_vendor { cpuinfo_vendor_dec = 57, }; -#endif +#endif // !defined(CPUINFO_SUPPORTED) } // namespace @@ -202,7 +202,7 @@ constexpr std::array kCpuVendorInfos{ // TODO add more as needed }; -constexpr const CpuVendorInfo* FindCpuVendorInfo(cpuinfo_vendor vendor) { +const CpuVendorInfo* FindCpuVendorInfo(cpuinfo_vendor vendor) { const auto vendor_mapping_it = std::find_if(kCpuVendorInfos.begin(), kCpuVendorInfos.end(), [vendor](const CpuVendorInfo& entry) { return entry.vendor == vendor; From 1ddd31a25230e7bb8b17bf787cc3aeaa18da90ed Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 14 Aug 2025 14:39:00 -0700 Subject: [PATCH 30/39] disable DeviceDiscoveryTest.HasCpuDevice for WASM --- onnxruntime/test/platform/device_discovery_test.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/onnxruntime/test/platform/device_discovery_test.cc b/onnxruntime/test/platform/device_discovery_test.cc index 0d78fe6e25909..46e3926560761 100644 --- a/onnxruntime/test/platform/device_discovery_test.cc +++ b/onnxruntime/test/platform/device_discovery_test.cc @@ -22,6 +22,10 @@ std::vector GetDevicesByType(OrtHardwareDeviceType device_typ } // namespace TEST(DeviceDiscoveryTest, HasCpuDevice) { +#if defined(__wasm__) + GTEST_SKIP() << "CPU device discovery is not implemented for this platform."; +#endif // defined(__WASM__) + const auto cpu_devices = GetDevicesByType(OrtHardwareDeviceType_CPU); ASSERT_GT(cpu_devices.size(), 0); ASSERT_NE(cpu_devices[0].vendor_id, 0); From e1e1014565abc551db655cb3ae158ef9cc3f3f7a Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 14 Aug 2025 14:54:23 -0700 Subject: [PATCH 31/39] debugging - dump android logs if java test fails --- tools/ci_build/build.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index c8ef3e22b43f1..eb496b92ccca2 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1560,19 +1560,26 @@ def run_adb_shell(cmd): adb_shell(f"rm {device_dir}/onnxruntime_test_all") if args.build_java: - # use the gradle wrapper under /java - gradle_executable = os.path.join(source_dir, "java", "gradlew.bat" if is_windows() else "gradlew") - android_test_path = os.path.join(cwd, "java", "androidtest", "android") - run_subprocess( - [ - gradle_executable, - "--no-daemon", - f"-DminSdkVer={args.android_api}", - "clean", - "connectedDebugAndroidTest", - ], - cwd=android_test_path, - ) + try: + run_subprocess([sdk_tool_paths.adb, "logcat", "-c"]) + + # use the gradle wrapper under /java + gradle_executable = os.path.join(source_dir, "java", "gradlew.bat" if is_windows() else "gradlew") + android_test_path = os.path.join(cwd, "java", "androidtest", "android") + run_subprocess( + [ + gradle_executable, + "--no-daemon", + f"-DminSdkVer={args.android_api}", + "clean", + "connectedDebugAndroidTest", + ], + cwd=android_test_path, + ) + except: + log.warning("test failed, dumping android logs with logcat") + run_subprocess([sdk_tool_paths.adb, "logcat", "-d"]) + raise if args.use_nnapi: run_adb_shell(f"{device_dir}/onnx_test_runner -e nnapi {device_dir}/test") From 3ad0927dd4ea0240bec04dbb21a09ad5de27a958 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 14 Aug 2025 16:30:49 -0700 Subject: [PATCH 32/39] disable gpu device discovery via sysfs on Android. there are permissions issues. --- .../core/platform/linux/device_discovery.cc | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/platform/linux/device_discovery.cc b/onnxruntime/core/platform/linux/device_discovery.cc index 91538463b298c..803c8c61c682e 100644 --- a/onnxruntime/core/platform/linux/device_discovery.cc +++ b/onnxruntime/core/platform/linux/device_discovery.cc @@ -30,6 +30,8 @@ OrtHardwareDevice GetCpuDevice() { return cpu_device; } +#if !defined(__ANDROID__) + struct GpuSysfsPathInfo { size_t card_idx; fs::path path; @@ -63,12 +65,12 @@ std::vector DetectGpuSysfsPaths() { std::vector gpu_sysfs_paths{}; for (const auto& dir_item : fs::directory_iterator{sysfs_class_drm_path}) { - auto dir_item_path = dir_item.path(); + const auto& dir_item_path = dir_item.path(); if (size_t card_idx{}; detect_card_path(dir_item_path, card_idx)) { GpuSysfsPathInfo path_info{}; path_info.card_idx = card_idx; - path_info.path = std::move(dir_item_path); + path_info.path = dir_item_path; gpu_sysfs_paths.emplace_back(std::move(path_info)); } } @@ -90,7 +92,7 @@ ValueType ReadValueFromFile(const fs::path& file_path) { return ParseStringWithClassicLocale(file_text); } -OrtHardwareDevice GetGpuDevice(const GpuSysfsPathInfo& path_info) { +OrtHardwareDevice GetGpuDeviceFromSysfs(const GpuSysfsPathInfo& path_info) { OrtHardwareDevice gpu_device{}; const auto& sysfs_path = path_info.path; @@ -117,16 +119,26 @@ OrtHardwareDevice GetGpuDevice(const GpuSysfsPathInfo& path_info) { return gpu_device; } +#endif // !defined(__ANDROID__) + std::vector GetGpuDevices() { - const auto gpu_sysfs_path_infos = DetectGpuSysfsPaths(); std::vector gpu_devices{}; + +#if !defined(__ANDROID__) + + const auto gpu_sysfs_path_infos = DetectGpuSysfsPaths(); gpu_devices.reserve(gpu_sysfs_path_infos.size()); for (const auto& gpu_sysfs_path_info : gpu_sysfs_path_infos) { - auto gpu_device = GetGpuDevice(gpu_sysfs_path_info); + auto gpu_device = GetGpuDeviceFromSysfs(gpu_sysfs_path_info); gpu_devices.emplace_back(std::move(gpu_device)); } +#else // defined(__ANDROID__) + // In an Android app, we don't have permission to read sysfs. + // TODO detect GPU devices on Android +#endif // defined(__ANDROID__) + return gpu_devices; } From 6adf61f92aac2ccc13265faeecdc673b9d64fe79 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 14 Aug 2025 16:31:25 -0700 Subject: [PATCH 33/39] format --- onnxruntime/core/platform/linux/device_discovery.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/onnxruntime/core/platform/linux/device_discovery.cc b/onnxruntime/core/platform/linux/device_discovery.cc index 803c8c61c682e..4c93ff32e6009 100644 --- a/onnxruntime/core/platform/linux/device_discovery.cc +++ b/onnxruntime/core/platform/linux/device_discovery.cc @@ -135,8 +135,10 @@ std::vector GetGpuDevices() { } #else // defined(__ANDROID__) + // In an Android app, we don't have permission to read sysfs. // TODO detect GPU devices on Android + #endif // defined(__ANDROID__) return gpu_devices; From 100228f506681eb960fcc69cb7e203ef7c756173 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 14 Aug 2025 16:52:32 -0700 Subject: [PATCH 34/39] use ExitStack to dump Android logs --- tools/ci_build/build.py | 50 +++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index eb496b92ccca2..dd3e096c0334b 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1515,8 +1515,8 @@ def adb_push(src, dest, **kwargs): def adb_shell(*args, **kwargs): return run_subprocess([sdk_tool_paths.adb, "shell", *args], **kwargs) - def adb_install(*args, **kwargs): - return run_subprocess([sdk_tool_paths.adb, "install", *args], **kwargs) + def adb_logcat(*args, **kwargs): + return run_subprocess([sdk_tool_paths.adb, "logcat", *args], **kwargs) def run_adb_shell(cmd): # GCOV_PREFIX_STRIP specifies the depth of the directory hierarchy to strip and @@ -1542,6 +1542,17 @@ def run_adb_shell(cmd): ) context_stack.callback(android.stop_emulator, emulator_proc) + all_android_tests_passed = False + + def dump_logs_on_failure(): + if not all_android_tests_passed: + log.warning("Android test failed. Dumping logs.") + adb_logcat("-d") # dump logs + + context_stack.callback(dump_logs_on_failure) + + adb_logcat("-c") # clear logs + adb_push("testdata", device_dir, cwd=cwd) if is_linux() and os.path.exists("/data/onnx"): adb_push("/data/onnx", device_dir + "/test", cwd=cwd) @@ -1560,26 +1571,19 @@ def run_adb_shell(cmd): adb_shell(f"rm {device_dir}/onnxruntime_test_all") if args.build_java: - try: - run_subprocess([sdk_tool_paths.adb, "logcat", "-c"]) - - # use the gradle wrapper under /java - gradle_executable = os.path.join(source_dir, "java", "gradlew.bat" if is_windows() else "gradlew") - android_test_path = os.path.join(cwd, "java", "androidtest", "android") - run_subprocess( - [ - gradle_executable, - "--no-daemon", - f"-DminSdkVer={args.android_api}", - "clean", - "connectedDebugAndroidTest", - ], - cwd=android_test_path, - ) - except: - log.warning("test failed, dumping android logs with logcat") - run_subprocess([sdk_tool_paths.adb, "logcat", "-d"]) - raise + # use the gradle wrapper under /java + gradle_executable = os.path.join(source_dir, "java", "gradlew.bat" if is_windows() else "gradlew") + android_test_path = os.path.join(cwd, "java", "androidtest", "android") + run_subprocess( + [ + gradle_executable, + "--no-daemon", + f"-DminSdkVer={args.android_api}", + "clean", + "connectedDebugAndroidTest", + ], + cwd=android_test_path, + ) if args.use_nnapi: run_adb_shell(f"{device_dir}/onnx_test_runner -e nnapi {device_dir}/test") @@ -1600,6 +1604,8 @@ def run_adb_shell(cmd): f"LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{device_dir} {device_dir}/onnxruntime_customopregistration_test" ) + all_android_tests_passed = True + def run_ios_tests(args, source_dir, config, cwd): is_targeting_iphone_simulator = "iphonesimulator" in args.apple_sysroot.lower() From 1afaa3eda5afd4d100310032e1e5f6baca78e42d Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 14 Aug 2025 18:56:36 -0700 Subject: [PATCH 35/39] add get CPU device helper function, remove ANDROID handling from linux/device_discovery.cc, add CPU device to default impl. --- cmake/onnxruntime_common.cmake | 2 +- .../core/platform/apple/device_discovery.cc | 15 +---------- onnxruntime/core/platform/device_discovery.h | 4 +++ .../core/platform/device_discovery_common.cc | 13 +++++++++ .../core/platform/device_discovery_default.cc | 10 +++++-- .../core/platform/linux/device_discovery.cc | 27 +------------------ .../test/platform/device_discovery_test.cc | 7 +++-- 7 files changed, 31 insertions(+), 47 deletions(-) diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake index 4b312b07e186e..d927489372e7c 100644 --- a/cmake/onnxruntime_common.cmake +++ b/cmake/onnxruntime_common.cmake @@ -82,7 +82,7 @@ endif() if (WIN32) list(APPEND onnxruntime_common_src_patterns "${ONNXRUNTIME_ROOT}/core/platform/windows/device_discovery.cc") -elseif (LINUX OR ANDROID) +elseif (LINUX) list(APPEND onnxruntime_common_src_patterns "${ONNXRUNTIME_ROOT}/core/platform/linux/device_discovery.cc") elseif (APPLE) diff --git a/onnxruntime/core/platform/apple/device_discovery.cc b/onnxruntime/core/platform/apple/device_discovery.cc index e41abd9143afe..767b834e38756 100644 --- a/onnxruntime/core/platform/apple/device_discovery.cc +++ b/onnxruntime/core/platform/apple/device_discovery.cc @@ -6,7 +6,6 @@ #include #include -#include "core/common/cpuid_info.h" #include "core/common/logging/logging.h" namespace onnxruntime { @@ -16,18 +15,6 @@ namespace { constexpr auto kApplePciVendorId = 0x106B; constexpr auto kAppleVendorName = "Apple"; -OrtHardwareDevice GetCpuDevice() { - const auto& cpuid_info = CPUIDInfo::GetCPUIDInfo(); - - OrtHardwareDevice cpu_device{}; - cpu_device.vendor = cpuid_info.GetCPUVendor(); - cpu_device.vendor_id = cpuid_info.GetCPUVendorId(); - cpu_device.device_id = 0; - cpu_device.type = OrtHardwareDeviceType_CPU; - - return cpu_device; -} - std::vector GetGpuDevices() { std::vector result{}; @@ -100,7 +87,7 @@ std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatfor std::unordered_set devices; // get CPU devices - devices.insert(GetCpuDevice()); + devices.insert(GetCpuDeviceFromCPUIDInfo()); // get GPU devices { diff --git a/onnxruntime/core/platform/device_discovery.h b/onnxruntime/core/platform/device_discovery.h index 1c32c7a29de14..b49e63b90236a 100644 --- a/onnxruntime/core/platform/device_discovery.h +++ b/onnxruntime/core/platform/device_discovery.h @@ -18,5 +18,9 @@ class DeviceDiscovery { // platform specific code implements this method static std::unordered_set DiscoverDevicesForPlatform(); + + // Gets a CPU device by querying `CPUIDInfo`. + static OrtHardwareDevice GetCpuDeviceFromCPUIDInfo(); }; + } // namespace onnxruntime diff --git a/onnxruntime/core/platform/device_discovery_common.cc b/onnxruntime/core/platform/device_discovery_common.cc index 2adc556899db3..dcba31aed6fec 100644 --- a/onnxruntime/core/platform/device_discovery_common.cc +++ b/onnxruntime/core/platform/device_discovery_common.cc @@ -7,6 +7,7 @@ #include +#include "core/common/cpuid_info.h" #include "core/common/logging/logging.h" namespace onnxruntime { @@ -39,4 +40,16 @@ const std::unordered_set& DeviceDiscovery::GetDevices() { return devices; } +OrtHardwareDevice DeviceDiscovery::GetCpuDeviceFromCPUIDInfo() { + const auto& cpuid_info = CPUIDInfo::GetCPUIDInfo(); + + OrtHardwareDevice cpu_device{}; + cpu_device.vendor = cpuid_info.GetCPUVendor(); + cpu_device.vendor_id = cpuid_info.GetCPUVendorId(); + cpu_device.device_id = 0; + cpu_device.type = OrtHardwareDeviceType_CPU; + + return cpu_device; +} + } // namespace onnxruntime diff --git a/onnxruntime/core/platform/device_discovery_default.cc b/onnxruntime/core/platform/device_discovery_default.cc index 62d7f8f2a77b8..73ddf516034ab 100644 --- a/onnxruntime/core/platform/device_discovery_default.cc +++ b/onnxruntime/core/platform/device_discovery_default.cc @@ -6,8 +6,14 @@ namespace onnxruntime { std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatform() { - // This is a default implementation which does not try to discover anything. - return {}; + // This is a default implementation. + // We assume that there is a CPU device and do not attempt to discover anything else. + + std::unordered_set devices{}; + + devices.emplace(GetCpuDeviceFromCPUIDInfo()); + + return devices; } } // namespace onnxruntime diff --git a/onnxruntime/core/platform/linux/device_discovery.cc b/onnxruntime/core/platform/linux/device_discovery.cc index 4c93ff32e6009..dad2b0ee75f5a 100644 --- a/onnxruntime/core/platform/linux/device_discovery.cc +++ b/onnxruntime/core/platform/linux/device_discovery.cc @@ -9,7 +9,6 @@ #include #include "core/common/common.h" -#include "core/common/cpuid_info.h" #include "core/common/parse_string.h" #include "core/common/string_utils.h" @@ -18,19 +17,6 @@ namespace fs = std::filesystem; namespace onnxruntime { namespace { -OrtHardwareDevice GetCpuDevice() { - const auto& cpuid_info = CPUIDInfo::GetCPUIDInfo(); - - OrtHardwareDevice cpu_device{}; - cpu_device.vendor = cpuid_info.GetCPUVendor(); - cpu_device.vendor_id = cpuid_info.GetCPUVendorId(); - cpu_device.device_id = 0; - cpu_device.type = OrtHardwareDeviceType_CPU; - - return cpu_device; -} - -#if !defined(__ANDROID__) struct GpuSysfsPathInfo { size_t card_idx; @@ -119,13 +105,9 @@ OrtHardwareDevice GetGpuDeviceFromSysfs(const GpuSysfsPathInfo& path_info) { return gpu_device; } -#endif // !defined(__ANDROID__) - std::vector GetGpuDevices() { std::vector gpu_devices{}; -#if !defined(__ANDROID__) - const auto gpu_sysfs_path_infos = DetectGpuSysfsPaths(); gpu_devices.reserve(gpu_sysfs_path_infos.size()); @@ -134,13 +116,6 @@ std::vector GetGpuDevices() { gpu_devices.emplace_back(std::move(gpu_device)); } -#else // defined(__ANDROID__) - - // In an Android app, we don't have permission to read sysfs. - // TODO detect GPU devices on Android - -#endif // defined(__ANDROID__) - return gpu_devices; } @@ -150,7 +125,7 @@ std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatfor std::unordered_set devices; // get CPU devices - devices.emplace(GetCpuDevice()); + devices.emplace(GetCpuDeviceFromCPUIDInfo()); // get GPU devices { diff --git a/onnxruntime/test/platform/device_discovery_test.cc b/onnxruntime/test/platform/device_discovery_test.cc index 46e3926560761..21ddf9a5b1cd7 100644 --- a/onnxruntime/test/platform/device_discovery_test.cc +++ b/onnxruntime/test/platform/device_discovery_test.cc @@ -22,13 +22,12 @@ std::vector GetDevicesByType(OrtHardwareDeviceType device_typ } // namespace TEST(DeviceDiscoveryTest, HasCpuDevice) { -#if defined(__wasm__) - GTEST_SKIP() << "CPU device discovery is not implemented for this platform."; -#endif // defined(__WASM__) - const auto cpu_devices = GetDevicesByType(OrtHardwareDeviceType_CPU); ASSERT_GT(cpu_devices.size(), 0); + +#if !defined(__wasm__) ASSERT_NE(cpu_devices[0].vendor_id, 0); +#endif // !defined(__WASM__) } } // namespace onnxruntime::test From cdca7a9aa0798aaf02f14b725da71e09dd3e0211 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 14 Aug 2025 19:02:24 -0700 Subject: [PATCH 36/39] remove TODO about extending hex parsing to other types. largely hypothetical now. --- include/onnxruntime/core/common/parse_string.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/onnxruntime/core/common/parse_string.h b/include/onnxruntime/core/common/parse_string.h index 1a9580184465b..e6df6672cf519 100644 --- a/include/onnxruntime/core/common/parse_string.h +++ b/include/onnxruntime/core/common/parse_string.h @@ -39,7 +39,6 @@ TryParseStringWithClassicLocale(std::string_view str, T& value) { std::from_chars_result conversion_result{}; if constexpr (std::is_integral_v && std::is_unsigned_v) { // For unsigned integral types, also handle hex values, i.e., those beginning with "0x". - // TODO We could also extend this to other types. For that, we would need to handle negative values. const bool has_hex_prefix = str.size() >= 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'); From 35b3796158a42f1ecd75590e0f1a2cc52d0ed083 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 14 Aug 2025 19:05:18 -0700 Subject: [PATCH 37/39] remove commented out test --- onnxruntime/test/common/string_utils_test.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/test/common/string_utils_test.cc b/onnxruntime/test/common/string_utils_test.cc index 62925d68cb604..983f7fa7a87f9 100644 --- a/onnxruntime/test/common/string_utils_test.cc +++ b/onnxruntime/test/common/string_utils_test.cc @@ -35,7 +35,6 @@ TEST(StringUtilsTest, TryParseStringWithClassicLocale) { TestSuccessfulParse("-1", -1); TestSuccessfulParse("42", 42u); TestSuccessfulParse("2.5", 2.5f); - // TestSuccessfulParse("0x2.8", 2.5f); // we don't handle floating point hex yet TestSuccessfulParse("0x100", uint32_t{0x100}); // out of range From 30370bc5bcff986e0e5e94d2e675ab03f5a8f242 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Fri, 15 Aug 2025 08:24:33 -0700 Subject: [PATCH 38/39] add comment about std::fum_chars not accepting 0x prefix --- include/onnxruntime/core/common/parse_string.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/onnxruntime/core/common/parse_string.h b/include/onnxruntime/core/common/parse_string.h index e6df6672cf519..5f88d490b3415 100644 --- a/include/onnxruntime/core/common/parse_string.h +++ b/include/onnxruntime/core/common/parse_string.h @@ -39,6 +39,7 @@ TryParseStringWithClassicLocale(std::string_view str, T& value) { std::from_chars_result conversion_result{}; if constexpr (std::is_integral_v && std::is_unsigned_v) { // For unsigned integral types, also handle hex values, i.e., those beginning with "0x". + // std::from_chars() does not accept the "0x" prefix. const bool has_hex_prefix = str.size() >= 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'); From c3138a7603aca0c40e171adcf2c16c25c8e0e596 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Mon, 18 Aug 2025 16:38:06 -0700 Subject: [PATCH 39/39] linux - log GPU discovery error instead of throwing exception --- .../core/platform/linux/device_discovery.cc | 79 +++++++++++++------ 1 file changed, 55 insertions(+), 24 deletions(-) diff --git a/onnxruntime/core/platform/linux/device_discovery.cc b/onnxruntime/core/platform/linux/device_discovery.cc index dad2b0ee75f5a..6a02a1b46028f 100644 --- a/onnxruntime/core/platform/linux/device_discovery.cc +++ b/onnxruntime/core/platform/linux/device_discovery.cc @@ -9,6 +9,7 @@ #include #include "core/common/common.h" +#include "core/common/logging/logging.h" #include "core/common/parse_string.h" #include "core/common/string_utils.h" @@ -18,16 +19,30 @@ namespace onnxruntime { namespace { +Status ErrorCodeToStatus(const std::error_code& ec) { + if (!ec) { + return Status::OK(); + } + + return Status{common::StatusCategory::ONNXRUNTIME, common::StatusCode::FAIL, + MakeString("Error: std::error_code with category name: ", ec.category().name(), + ", value: ", ec.value(), ", message: ", ec.message())}; +} + struct GpuSysfsPathInfo { size_t card_idx; fs::path path; }; -std::vector DetectGpuSysfsPaths() { +Status DetectGpuSysfsPaths(std::vector& gpu_sysfs_paths_out) { + std::error_code error_code{}; const fs::path sysfs_class_drm_path = "/sys/class/drm"; + const bool sysfs_class_drm_path_exists = fs::exists(sysfs_class_drm_path, error_code); + ORT_RETURN_IF_ERROR(ErrorCodeToStatus(error_code)); - if (!fs::exists(sysfs_class_drm_path)) { - return {}; + if (!sysfs_class_drm_path_exists) { + gpu_sysfs_paths_out = std::vector{}; + return Status::OK(); } const auto detect_card_path = [](const fs::path& sysfs_path, size_t& card_idx) -> bool { @@ -50,7 +65,11 @@ std::vector DetectGpuSysfsPaths() { }; std::vector gpu_sysfs_paths{}; - for (const auto& dir_item : fs::directory_iterator{sysfs_class_drm_path}) { + + auto dir_iterator = fs::directory_iterator{sysfs_class_drm_path, error_code}; + ORT_RETURN_IF_ERROR(ErrorCodeToStatus(error_code)); + + for (const auto& dir_item : dir_iterator) { const auto& dir_item_path = dir_item.path(); if (size_t card_idx{}; detect_card_path(dir_item_path, card_idx)) { @@ -61,31 +80,34 @@ std::vector DetectGpuSysfsPaths() { } } - return gpu_sysfs_paths; + gpu_sysfs_paths_out = std::move(gpu_sysfs_paths); + return Status::OK(); } -std::string ReadFileContents(const fs::path& file_path) { +Status ReadFileContents(const fs::path& file_path, std::string& contents) { std::ifstream file{file_path}; - ORT_ENFORCE(file, "Failed to open file: ", file_path); + ORT_RETURN_IF_NOT(file, "Failed to open file: ", file_path); std::istreambuf_iterator file_begin{file}, file_end{}; - std::string contents(file_begin, file_end); - return contents; + contents.assign(file_begin, file_end); + return Status::OK(); } template -ValueType ReadValueFromFile(const fs::path& file_path) { - const auto file_text = utils::TrimString(ReadFileContents(file_path)); - return ParseStringWithClassicLocale(file_text); +Status ReadValueFromFile(const fs::path& file_path, ValueType& value) { + std::string file_text{}; + ORT_RETURN_IF_ERROR(ReadFileContents(file_path, file_text)); + file_text = utils::TrimString(file_text); + return ParseStringWithClassicLocale(file_text, value); } -OrtHardwareDevice GetGpuDeviceFromSysfs(const GpuSysfsPathInfo& path_info) { +Status GetGpuDeviceFromSysfs(const GpuSysfsPathInfo& path_info, OrtHardwareDevice& gpu_device_out) { OrtHardwareDevice gpu_device{}; const auto& sysfs_path = path_info.path; // vendor id { const auto vendor_id_path = sysfs_path / "device" / "vendor"; - gpu_device.vendor_id = ReadValueFromFile(vendor_id_path); + ORT_RETURN_IF_ERROR(ReadValueFromFile(vendor_id_path, gpu_device.vendor_id)); } // TODO vendor name @@ -93,7 +115,7 @@ OrtHardwareDevice GetGpuDeviceFromSysfs(const GpuSysfsPathInfo& path_info) { // device id { const auto device_id_path = sysfs_path / "device" / "device"; - gpu_device.device_id = ReadValueFromFile(device_id_path); + ORT_RETURN_IF_ERROR(ReadValueFromFile(device_id_path, gpu_device.device_id)); } // metadata @@ -102,21 +124,25 @@ OrtHardwareDevice GetGpuDeviceFromSysfs(const GpuSysfsPathInfo& path_info) { gpu_device.type = OrtHardwareDeviceType_GPU; - return gpu_device; + gpu_device_out = std::move(gpu_device); + return Status::OK(); } -std::vector GetGpuDevices() { - std::vector gpu_devices{}; +Status GetGpuDevices(std::vector& gpu_devices_out) { + std::vector gpu_sysfs_path_infos{}; + ORT_RETURN_IF_ERROR(DetectGpuSysfsPaths(gpu_sysfs_path_infos)); - const auto gpu_sysfs_path_infos = DetectGpuSysfsPaths(); + std::vector gpu_devices{}; gpu_devices.reserve(gpu_sysfs_path_infos.size()); for (const auto& gpu_sysfs_path_info : gpu_sysfs_path_infos) { - auto gpu_device = GetGpuDeviceFromSysfs(gpu_sysfs_path_info); + OrtHardwareDevice gpu_device{}; + ORT_RETURN_IF_ERROR(GetGpuDeviceFromSysfs(gpu_sysfs_path_info, gpu_device)); gpu_devices.emplace_back(std::move(gpu_device)); } - return gpu_devices; + gpu_devices_out = std::move(gpu_devices); + return Status::OK(); } } // namespace @@ -129,9 +155,14 @@ std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatfor // get GPU devices { - auto gpu_devices = GetGpuDevices(); - devices.insert(std::make_move_iterator(gpu_devices.begin()), - std::make_move_iterator(gpu_devices.end())); + std::vector gpu_devices{}; + Status gpu_device_discovery_status = GetGpuDevices(gpu_devices); + if (gpu_device_discovery_status.IsOK()) { + devices.insert(std::make_move_iterator(gpu_devices.begin()), + std::make_move_iterator(gpu_devices.end())); + } else { + LOGS_DEFAULT(WARNING) << "GPU device discovery failed: " << gpu_device_discovery_status.ErrorMessage(); + } } // get NPU devices