diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake index 5dcc2b2628bf4..d927489372e7c 100644 --- a/cmake/onnxruntime_common.cmake +++ b/cmake/onnxruntime_common.cmake @@ -14,7 +14,7 @@ set(onnxruntime_common_src_patterns "${ONNXRUNTIME_ROOT}/core/platform/check_intel.h" "${ONNXRUNTIME_ROOT}/core/platform/check_intel.cc" "${ONNXRUNTIME_ROOT}/core/platform/device_discovery.h" - "${ONNXRUNTIME_ROOT}/core/platform/device_discovery.cc" + "${ONNXRUNTIME_ROOT}/core/platform/device_discovery_common.cc" "${ONNXRUNTIME_ROOT}/core/platform/env.h" "${ONNXRUNTIME_ROOT}/core/platform/env.cc" "${ONNXRUNTIME_ROOT}/core/platform/env_time.h" @@ -32,18 +32,30 @@ set(onnxruntime_common_src_patterns if(WIN32) list(APPEND onnxruntime_common_src_patterns - "${ONNXRUNTIME_ROOT}/core/platform/windows/*.h" - "${ONNXRUNTIME_ROOT}/core/platform/windows/*.cc" + "${ONNXRUNTIME_ROOT}/core/platform/windows/debug_alloc.cc" + "${ONNXRUNTIME_ROOT}/core/platform/windows/debug_alloc.h" + "${ONNXRUNTIME_ROOT}/core/platform/windows/dll_load_error.cc" + "${ONNXRUNTIME_ROOT}/core/platform/windows/dll_load_error.h" + "${ONNXRUNTIME_ROOT}/core/platform/windows/env_time.cc" + "${ONNXRUNTIME_ROOT}/core/platform/windows/env.cc" + "${ONNXRUNTIME_ROOT}/core/platform/windows/env.h" + "${ONNXRUNTIME_ROOT}/core/platform/windows/hardware_core_enumerator.cc" + "${ONNXRUNTIME_ROOT}/core/platform/windows/hardware_core_enumerator.h" + "${ONNXRUNTIME_ROOT}/core/platform/windows/stacktrace.cc" + "${ONNXRUNTIME_ROOT}/core/platform/windows/telemetry.cc" + "${ONNXRUNTIME_ROOT}/core/platform/windows/telemetry.h" "${ONNXRUNTIME_ROOT}/core/platform/windows/logging/*.h" "${ONNXRUNTIME_ROOT}/core/platform/windows/logging/*.cc" ) else() list(APPEND onnxruntime_common_src_patterns - "${ONNXRUNTIME_ROOT}/core/platform/posix/*.h" - "${ONNXRUNTIME_ROOT}/core/platform/posix/*.cc" + "${ONNXRUNTIME_ROOT}/core/platform/posix/env_time.cc" + "${ONNXRUNTIME_ROOT}/core/platform/posix/env.cc" + "${ONNXRUNTIME_ROOT}/core/platform/posix/stacktrace.cc" ) + # logging files if (onnxruntime_USE_SYSLOG) list(APPEND onnxruntime_common_src_patterns "${ONNXRUNTIME_ROOT}/core/platform/posix/logging/*.h" @@ -51,7 +63,7 @@ else() ) endif() - if (CMAKE_SYSTEM_NAME STREQUAL "Android") + if (ANDROID) list(APPEND onnxruntime_common_src_patterns "${ONNXRUNTIME_ROOT}/core/platform/android/logging/*.h" "${ONNXRUNTIME_ROOT}/core/platform/android/logging/*.cc" @@ -66,6 +78,21 @@ else() endif() endif() +# platform-specific device discovery files +if (WIN32) + list(APPEND onnxruntime_common_src_patterns + "${ONNXRUNTIME_ROOT}/core/platform/windows/device_discovery.cc") +elseif (LINUX) + list(APPEND onnxruntime_common_src_patterns + "${ONNXRUNTIME_ROOT}/core/platform/linux/device_discovery.cc") +elseif (APPLE) + list(APPEND onnxruntime_common_src_patterns + "${ONNXRUNTIME_ROOT}/core/platform/apple/device_discovery.cc") +else() + list(APPEND onnxruntime_common_src_patterns + "${ONNXRUNTIME_ROOT}/core/platform/device_discovery_default.cc") +endif() + if(onnxruntime_target_platform STREQUAL "ARM64EC") if (MSVC) link_directories("$ENV{VCINSTALLDIR}/Tools/MSVC/$ENV{VCToolsVersion}/lib/ARM64EC") @@ -216,8 +243,6 @@ endif() if (RISCV64 OR ARM64 OR ARM OR X86 OR X64 OR X86_64) # Link cpuinfo if supported - # Using it mainly in ARM with Android. - # Its functionality in detecting x86 cpu features are lacking, so is support for Windows. if (CPUINFO_SUPPORTED) onnxruntime_add_include_to_target(onnxruntime_common cpuinfo::cpuinfo) list(APPEND onnxruntime_EXTERNAL_LIBRARIES cpuinfo::cpuinfo ${ONNXRUNTIME_CLOG_TARGET_NAME}) diff --git a/include/onnxruntime/core/common/parse_string.h b/include/onnxruntime/core/common/parse_string.h index 6345b2a55490d..5f88d490b3415 100644 --- a/include/onnxruntime/core/common/parse_string.h +++ b/include/onnxruntime/core/common/parse_string.h @@ -35,13 +35,30 @@ template std::enable_if_t, bool> TryParseStringWithClassicLocale(std::string_view str, T& value) { T parsed_value{}; - const auto [ptr, ec] = std::from_chars(str.data(), str.data() + str.size(), parsed_value); - if (ec != std::errc{}) { + std::from_chars_result conversion_result{}; + if constexpr (std::is_integral_v && std::is_unsigned_v) { + // For unsigned integral types, also handle hex values, i.e., those beginning with "0x". + // std::from_chars() does not accept the "0x" prefix. + const bool has_hex_prefix = str.size() >= 2 && + str[0] == '0' && + (str[1] == 'x' || str[1] == 'X'); + + if (has_hex_prefix) { + str = str.substr(2); + } + + const int base = has_hex_prefix ? 16 : 10; + conversion_result = std::from_chars(str.data(), str.data() + str.size(), parsed_value, base); + } else { + conversion_result = std::from_chars(str.data(), str.data() + str.size(), parsed_value); + } + + if (conversion_result.ec != std::errc{}) { return false; } - if (ptr != str.data() + str.size()) { + if (conversion_result.ptr != str.data() + str.size()) { return false; } diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index dccfdbda8971b..6c66047b4b36a 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -1,6 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "core/common/cpuid_info.h" + +#include +#include + #include "core/common/logging/logging.h" #include "core/common/logging/severity.h" #include "core/platform/check_intel.h" @@ -51,6 +55,14 @@ #endif // _WIN32 +#if defined(__APPLE__) +#if defined(CPUIDINFO_ARCH_ARM) + +#include + +#endif // defined(CPUIDINFO_ARCH_ARM) +#endif // defined(__APPLE__) + #if defined(CPUINFO_SUPPORTED) #include #if defined(CPUIDINFO_ARCH_ARM) @@ -74,6 +86,14 @@ void decodeMIDR(uint32_t midr, uint32_t uarch[1]); namespace onnxruntime { +void CPUIDInfo::LogEarlyWarning(std::string_view message) { + if (logging::LoggingManager::HasDefaultLogger()) { + LOGS_DEFAULT(WARNING) << message; + } else { + std::cerr << "onnxruntime cpuid_info warning: " << message << "\n"; + } +} + #if defined(CPUIDINFO_ARCH_X86) static inline void GetCPUID(int function_id, int data[4]) { // NOLINT @@ -108,9 +128,6 @@ void CPUIDInfo::X86Init() { int data[4] = {-1}; GetCPUID(0, data); - vendor_ = GetX86Vendor(data); - vendor_id_ = GetVendorId(vendor_); - int num_IDs = data[0]; if (num_IDs >= 1) { GetCPUID(1, data); @@ -158,24 +175,8 @@ void CPUIDInfo::X86Init() { } } -std::string CPUIDInfo::GetX86Vendor(int32_t* data) { - char vendor[sizeof(int32_t) * 3 + 1]{}; - *reinterpret_cast(vendor + 0) = data[1]; - *reinterpret_cast(vendor + 4) = data[3]; - *reinterpret_cast(vendor + 8) = data[2]; - return vendor; -} - #endif // defined(CPUIDINFO_ARCH_X86) -uint32_t CPUIDInfo::GetVendorId(const std::string& vendor) { - if (vendor == "GenuineIntel") return 0x8086; - if (vendor == "AuthenticAMD") return 0x1022; - if (vendor.find("Qualcomm") == 0) return 'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24); - if (vendor.find("NV") == 0) return 0x10DE; - return 0; -} - #if defined(CPUIDINFO_ARCH_ARM) #if defined(__linux__) @@ -228,10 +229,6 @@ void CPUIDInfo::ArmLinuxInit() { #elif defined(_WIN32) // ^ defined(__linux__) void CPUIDInfo::ArmWindowsInit() { - // Get the ARM vendor string from the registry - vendor_ = GetArmWindowsVendor(); - vendor_id_ = GetVendorId(vendor_); - // Read MIDR and ID_AA64ISAR1_EL1 register values from Windows registry // There should be one per CPU std::vector midr_values{}, id_aa64isar1_el1_values{}; @@ -323,15 +320,6 @@ void CPUIDInfo::ArmWindowsInit() { #endif // defined(CPUINFO_SUPPORTED) } -std::string CPUIDInfo::GetArmWindowsVendor() { - const int MAX_VALUE_NAME = 256; - const CHAR vendorKey[] = "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"; - CHAR vendorVal[MAX_VALUE_NAME] = ""; - unsigned long vendorSize = sizeof(char) * MAX_VALUE_NAME; - ::RegGetValueA(HKEY_LOCAL_MACHINE, vendorKey, "Vendor Identifier", RRF_RT_REG_SZ | RRF_ZEROONFAILURE, nullptr, &vendorVal, &vendorSize); - return vendorVal; -} - #elif defined(__APPLE__) // ^ defined(_WIN32) void CPUIDInfo::ArmAppleInit() { @@ -376,16 +364,21 @@ uint32_t CPUIDInfo::GetCurrentCoreIdx() const { } CPUIDInfo::CPUIDInfo() { -#ifdef CPUIDINFO_ARCH_X86 - X86Init(); -#elif defined(CPUIDINFO_ARCH_ARM) #if defined(CPUINFO_SUPPORTED) pytorch_cpuinfo_init_ = cpuinfo_initialize(); if (!pytorch_cpuinfo_init_) { - LOGS_DEFAULT(WARNING) << "Failed to initialize PyTorch cpuinfo library. May cause CPU EP performance degradation " - "due to undetected CPU features."; + LogEarlyWarning( + "Failed to initialize PyTorch cpuinfo library. May cause CPU EP performance degradation due to undetected CPU " + "features."); } #endif // defined(CPUINFO_SUPPORTED) + + // Note: This should be run after cpuinfo initialization if cpuinfo is enabled. + VendorInfoInit(); + +#ifdef CPUIDINFO_ARCH_X86 + X86Init(); +#elif defined(CPUIDINFO_ARCH_ARM) #if defined(__linux__) ArmLinuxInit(); #elif defined(_WIN32) diff --git a/onnxruntime/core/common/cpuid_info.h b/onnxruntime/core/common/cpuid_info.h index 84571fa12e6ea..d49eca7e1d60c 100644 --- a/onnxruntime/core/common/cpuid_info.h +++ b/onnxruntime/core/common/cpuid_info.h @@ -103,7 +103,40 @@ class CPUIDInfo { } private: + // Log function that uses ORT logging if available or writes to stderr. + // This enables us to log even before ORT logging has been initialized. + static void LogEarlyWarning(std::string_view message); + CPUIDInfo(); + + void VendorInfoInit(); + +#if defined(CPUIDINFO_ARCH_X86) + + void X86Init(); + +#elif defined(CPUIDINFO_ARCH_ARM) + +#if defined(__linux__) + + void ArmLinuxInit(); + +#elif defined(_WIN32) + + void ArmWindowsInit(); + +#elif defined(__APPLE__) + + void ArmAppleInit(); + +#endif + +#endif // defined(CPUIDINFO_ARCH_ARM) + +#if defined(CPUINFO_SUPPORTED) + bool pytorch_cpuinfo_init_{false}; +#endif // defined(CPUINFO_SUPPORTED) + bool has_amx_bf16_{false}; bool has_avx_{false}; bool has_avx2_{false}; @@ -132,37 +165,6 @@ class CPUIDInfo { std::string vendor_; uint32_t vendor_id_; - - uint32_t GetVendorId(const std::string& vendor); - -#if defined(CPUIDINFO_ARCH_X86) - - void X86Init(); - std::string GetX86Vendor(int32_t* data); - -#elif defined(CPUIDINFO_ARCH_ARM) - -#if defined(CPUINFO_SUPPORTED) - // Now the following var is only used in ARM build, but later on we may expand the usage. - bool pytorch_cpuinfo_init_{false}; -#endif // defined(CPUINFO_SUPPORTED) - -#if defined(__linux__) - - void ArmLinuxInit(); - -#elif defined(_WIN32) - - void ArmWindowsInit(); - std::string GetArmWindowsVendor(); - -#elif defined(__APPLE__) - - void ArmAppleInit(); - -#endif - -#endif // defined(CPUIDINFO_ARCH_ARM) }; } // namespace onnxruntime diff --git a/onnxruntime/core/common/cpuid_info_vendor.cc b/onnxruntime/core/common/cpuid_info_vendor.cc new file mode 100644 index 0000000000000..d4d940eedfe28 --- /dev/null +++ b/onnxruntime/core/common/cpuid_info_vendor.cc @@ -0,0 +1,244 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/common/cpuid_info.h" + +#include +#include +#include + +#if defined(CPUINFO_SUPPORTED) +#include "cpuinfo.h" +#endif + +namespace { + +#if !defined(CPUINFO_SUPPORTED) + +// The `cpuinfo_vendor` enum is defined by the cpuinfo library. +// In case we don't build with cpuinfo, we define our own copy. +// The enum was copied from here: +// https://github.com/pytorch/cpuinfo/blob/8a1772a0c5c447df2d18edf33ec4603a8c9c04a6/include/cpuinfo.h#L154-L307 + +/** Vendor of processor core design */ +enum cpuinfo_vendor { + /** Processor vendor is not known to the library, or the library failed + to get vendor information from the OS. */ + cpuinfo_vendor_unknown = 0, + + /* Active vendors of modern CPUs */ + + /** + * Intel Corporation. Vendor of x86, x86-64, IA64, and ARM processor + * microarchitectures. + * + * Sold its ARM design subsidiary in 2006. The last ARM processor design + * was released in 2004. + */ + cpuinfo_vendor_intel = 1, + /** Advanced Micro Devices, Inc. Vendor of x86 and x86-64 processor + microarchitectures. */ + cpuinfo_vendor_amd = 2, + /** ARM Holdings plc. Vendor of ARM and ARM64 processor + microarchitectures. */ + cpuinfo_vendor_arm = 3, + /** Qualcomm Incorporated. Vendor of ARM and ARM64 processor + microarchitectures. */ + cpuinfo_vendor_qualcomm = 4, + /** Apple Inc. Vendor of ARM and ARM64 processor microarchitectures. */ + cpuinfo_vendor_apple = 5, + /** Samsung Electronics Co., Ltd. Vendir if ARM64 processor + microarchitectures. */ + cpuinfo_vendor_samsung = 6, + /** Nvidia Corporation. Vendor of ARM64-compatible processor + microarchitectures. */ + cpuinfo_vendor_nvidia = 7, + /** MIPS Technologies, Inc. Vendor of MIPS processor microarchitectures. + */ + cpuinfo_vendor_mips = 8, + /** International Business Machines Corporation. Vendor of PowerPC + processor microarchitectures. */ + cpuinfo_vendor_ibm = 9, + /** Ingenic Semiconductor. Vendor of MIPS processor microarchitectures. + */ + cpuinfo_vendor_ingenic = 10, + /** + * VIA Technologies, Inc. Vendor of x86 and x86-64 processor + * microarchitectures. + * + * Processors are designed by Centaur Technology, a subsidiary of VIA + * Technologies. + */ + cpuinfo_vendor_via = 11, + /** Cavium, Inc. Vendor of ARM64 processor microarchitectures. */ + cpuinfo_vendor_cavium = 12, + /** Broadcom, Inc. Vendor of ARM processor microarchitectures. */ + cpuinfo_vendor_broadcom = 13, + /** Applied Micro Circuits Corporation (APM). Vendor of ARM64 processor + microarchitectures. */ + cpuinfo_vendor_apm = 14, + /** + * Huawei Technologies Co., Ltd. Vendor of ARM64 processor + * microarchitectures. + * + * Processors are designed by HiSilicon, a subsidiary of Huawei. + */ + cpuinfo_vendor_huawei = 15, + /** + * Hygon (Chengdu Haiguang Integrated Circuit Design Co., Ltd), Vendor + * of x86-64 processor microarchitectures. + * + * Processors are variants of AMD cores. + */ + cpuinfo_vendor_hygon = 16, + /** SiFive, Inc. Vendor of RISC-V processor microarchitectures. */ + cpuinfo_vendor_sifive = 17, + + /* Active vendors of embedded CPUs */ + + /** Texas Instruments Inc. Vendor of ARM processor microarchitectures. + */ + cpuinfo_vendor_texas_instruments = 30, + /** Marvell Technology Group Ltd. Vendor of ARM processor + * microarchitectures. + */ + cpuinfo_vendor_marvell = 31, + /** RDC Semiconductor Co., Ltd. Vendor of x86 processor + microarchitectures. */ + cpuinfo_vendor_rdc = 32, + /** DM&P Electronics Inc. Vendor of x86 processor microarchitectures. */ + cpuinfo_vendor_dmp = 33, + /** Motorola, Inc. Vendor of PowerPC and ARM processor + microarchitectures. */ + cpuinfo_vendor_motorola = 34, + + /* Defunct CPU vendors */ + + /** + * Transmeta Corporation. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 2004. + * Transmeta processors implemented VLIW ISA and used binary translation + * to execute x86 code. + */ + cpuinfo_vendor_transmeta = 50, + /** + * Cyrix Corporation. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1996. + */ + cpuinfo_vendor_cyrix = 51, + /** + * Rise Technology. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1999. + */ + cpuinfo_vendor_rise = 52, + /** + * National Semiconductor. Vendor of x86 processor microarchitectures. + * + * Sold its x86 design subsidiary in 1999. The last processor design was + * released in 1998. + */ + cpuinfo_vendor_nsc = 53, + /** + * Silicon Integrated Systems. Vendor of x86 processor + * microarchitectures. + * + * Sold its x86 design subsidiary in 2001. The last processor design was + * released in 2001. + */ + cpuinfo_vendor_sis = 54, + /** + * NexGen. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1994. + * NexGen designed the first x86 microarchitecture which decomposed x86 + * instructions into simple microoperations. + */ + cpuinfo_vendor_nexgen = 55, + /** + * United Microelectronics Corporation. Vendor of x86 processor + * microarchitectures. + * + * Ceased x86 in the early 1990s. The last processor design was released + * in 1991. Designed U5C and U5D processors. Both are 486 level. + */ + cpuinfo_vendor_umc = 56, + /** + * Digital Equipment Corporation. Vendor of ARM processor + * microarchitecture. + * + * Sold its ARM designs in 1997. The last processor design was released + * in 1997. + */ + cpuinfo_vendor_dec = 57, +}; + +#endif // !defined(CPUINFO_SUPPORTED) + +} // namespace + +namespace onnxruntime { + +namespace { + +struct CpuVendorInfo { + cpuinfo_vendor vendor; + std::string_view name; + uint32_t id; +}; + +constexpr auto kUnknownCpuVendorInfo = CpuVendorInfo{cpuinfo_vendor_unknown, "unknown", 0x0000}; + +constexpr std::array kCpuVendorInfos{ + CpuVendorInfo{cpuinfo_vendor_amd, "AMD", 0x1022}, + CpuVendorInfo{cpuinfo_vendor_intel, "Intel", 0x8086}, + CpuVendorInfo{cpuinfo_vendor_qualcomm, "Qualcomm", uint32_t{'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24)}}, + CpuVendorInfo{cpuinfo_vendor_nvidia, "Nvidia", 0x10DE}, + CpuVendorInfo{cpuinfo_vendor_apple, "Apple", 0x106B}, + CpuVendorInfo{cpuinfo_vendor_arm, "ARM", 0x13B5}, + + // TODO add more as needed +}; + +const CpuVendorInfo* FindCpuVendorInfo(cpuinfo_vendor vendor) { + const auto vendor_mapping_it = std::find_if(kCpuVendorInfos.begin(), kCpuVendorInfos.end(), + [vendor](const CpuVendorInfo& entry) { + return entry.vendor == vendor; + }); + + if (vendor_mapping_it != kCpuVendorInfos.end()) { + return &*vendor_mapping_it; + } + + return nullptr; +} + +} // namespace + +void CPUIDInfo::VendorInfoInit() { + const cpuinfo_vendor vendor = [&]() { + cpuinfo_vendor result = cpuinfo_vendor_unknown; +#if defined(CPUINFO_SUPPORTED) + if (pytorch_cpuinfo_init_) { + const auto* processor = cpuinfo_get_processor(0); + if (processor && processor->core) { + result = processor->core->vendor; + } + } +#endif // defined(CPUINFO_SUPPORTED) + return result; + }(); + + const auto* vendor_info = FindCpuVendorInfo(vendor); + if (vendor_info == nullptr) { + LogEarlyWarning(MakeString("Unknown CPU vendor. cpuinfo_vendor value: ", static_cast(vendor))); + vendor_info = &kUnknownCpuVendorInfo; + } + + vendor_ = vendor_info->name; + vendor_id_ = vendor_info->id; +} + +} // namespace onnxruntime diff --git a/onnxruntime/core/common/string_utils.h b/onnxruntime/core/common/string_utils.h index c2e26f629330f..d8d943d6e9a41 100644 --- a/onnxruntime/core/common/string_utils.h +++ b/onnxruntime/core/common/string_utils.h @@ -61,10 +61,11 @@ inline void TrimStringFromRight(std::string& s) { * @param s The string to trim. * @return The trimmed string. */ -inline std::string TrimString(std::string s) { - TrimStringFromRight(s); - TrimStringFromLeft(s); - return s; +inline std::string TrimString(std::string_view s) { + std::string s_trimmed{s}; + TrimStringFromRight(s_trimmed); + TrimStringFromLeft(s_trimmed); + return s_trimmed; } /** diff --git a/onnxruntime/core/platform/apple/device_discovery.cc b/onnxruntime/core/platform/apple/device_discovery.cc new file mode 100644 index 0000000000000..767b834e38756 --- /dev/null +++ b/onnxruntime/core/platform/apple/device_discovery.cc @@ -0,0 +1,106 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/platform/device_discovery.h" + +#include +#include + +#include "core/common/logging/logging.h" + +namespace onnxruntime { + +namespace { + +constexpr auto kApplePciVendorId = 0x106B; +constexpr auto kAppleVendorName = "Apple"; + +std::vector GetGpuDevices() { + std::vector result{}; + + // For now, we assume the existence of one GPU if it is a Mac with Apple Silicon. + // TODO support iOS + // TODO support Intel Macs which may have more than one GPU +#if TARGET_OS_OSX && TARGET_CPU_ARM64 + { + OrtHardwareDevice gpu_device{}; + gpu_device.type = OrtHardwareDeviceType_GPU; + gpu_device.vendor_id = kApplePciVendorId; + gpu_device.vendor = kAppleVendorName; + + result.emplace_back(std::move(gpu_device)); + } +#endif // TARGET_OS_OSX && TARGET_CPU_ARM64 + + return result; +} + +bool HasAppleNeuralEngine() { + // Copied from onnxruntime/core/providers/coreml/builders/helper.cc:HasNeuralEngine(). + bool has_apple_neural_engine = false; + + struct utsname system_info; + uname(&system_info); + LOGS_DEFAULT(VERBOSE) << "Current Apple hardware info: " << system_info.machine; + +#if TARGET_OS_IPHONE + // utsname.machine has device identifier. For example, identifier for iPhone Xs is "iPhone11,2". + // Since Neural Engine is only available for use on A12 and later, major device version in the + // identifier is checked for these models: + // A12: iPhone XS (11,2), iPad Mini - 5th Gen (11,1) + // A12X: iPad Pro - 3rd Gen (8,1) + // For more information, see https://www.theiphonewiki.com/wiki/Models + size_t str_len = strnlen(system_info.machine, onnxruntime::kMaxStrLen); + if (str_len > 4 && strncmp("iPad", system_info.machine, 4) == 0) { + const int major_version = atoi(system_info.machine + 4); + has_apple_neural_engine = major_version >= 8; // There are no device between iPad 8 and 11. + } else if (str_len > 6 && strncmp("iPhone", system_info.machine, 6) == 0) { + const int major_version = atoi(system_info.machine + 6); + has_apple_neural_engine = major_version >= 11; + } +#elif TARGET_OS_OSX && TARGET_CPU_ARM64 + // Only Mac with arm64 CPU (Apple Silicon) has ANE. + has_apple_neural_engine = true; +#endif // #if TARGET_OS_IPHONE + + return has_apple_neural_engine; +} + +std::vector GetNpuDevices() { + std::vector result{}; + + if (HasAppleNeuralEngine()) { + OrtHardwareDevice npu_device{}; + npu_device.type = OrtHardwareDeviceType_NPU; + npu_device.vendor_id = kApplePciVendorId; + npu_device.vendor = kAppleVendorName; + + result.emplace_back(std::move(npu_device)); + } + + return result; +} + +} // namespace + +std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatform() { + std::unordered_set devices; + + // get CPU devices + devices.insert(GetCpuDeviceFromCPUIDInfo()); + + // get GPU devices + { + auto gpu_devices = GetGpuDevices(); + devices.insert(gpu_devices.begin(), gpu_devices.end()); + } + + // get NPU devices + { + auto npu_devices = GetNpuDevices(); + devices.insert(npu_devices.begin(), npu_devices.end()); + } + + return devices; +} +} // namespace onnxruntime diff --git a/onnxruntime/core/platform/device_discovery.h b/onnxruntime/core/platform/device_discovery.h index 70be10bf09e4e..b49e63b90236a 100644 --- a/onnxruntime/core/platform/device_discovery.h +++ b/onnxruntime/core/platform/device_discovery.h @@ -3,25 +3,24 @@ #pragma once -#include #include #include "core/session/abi_devices.h" + namespace onnxruntime { class DeviceDiscovery { public: - static std::unordered_set& GetDevices() { - // assumption: devices don't change. we assume the machine must be shutdown to change cpu/gpu/npu devices. - // technically someone could disable/enable a device in a running OS. we choose not to add complexity to support - // that scenario. - static std::unordered_set devices(DiscoverDevicesForPlatform()); - return devices; - } + static const std::unordered_set& GetDevices(); private: DeviceDiscovery() = default; + // platform specific code implements this method static std::unordered_set DiscoverDevicesForPlatform(); + + // Gets a CPU device by querying `CPUIDInfo`. + static OrtHardwareDevice GetCpuDeviceFromCPUIDInfo(); }; + } // namespace onnxruntime diff --git a/onnxruntime/core/platform/device_discovery_common.cc b/onnxruntime/core/platform/device_discovery_common.cc new file mode 100644 index 0000000000000..dcba31aed6fec --- /dev/null +++ b/onnxruntime/core/platform/device_discovery_common.cc @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// This file contains platform-agnostic device discovery implementation. + +#include "core/platform/device_discovery.h" + +#include + +#include "core/common/cpuid_info.h" +#include "core/common/logging/logging.h" + +namespace onnxruntime { + +const std::unordered_set& DeviceDiscovery::GetDevices() { + // assumption: devices don't change. we assume the machine must be shutdown to change cpu/gpu/npu devices. + // technically someone could disable/enable a device in a running OS. we choose not to add complexity to support + // that scenario. + static std::unordered_set devices = []() { + auto discovered_devices = DiscoverDevicesForPlatform(); + + // log discovered devices + for (const auto& ortdevice : discovered_devices) { + std::ostringstream oss; + oss << "Discovered OrtHardwareDevice {vendor_id:0x" << std::hex << ortdevice.vendor_id + << ", device_id:0x" << ortdevice.device_id + << ", vendor:" << ortdevice.vendor + << ", type:" << std::dec << static_cast(ortdevice.type) + << ", metadata: ["; + for (auto& [key, value] : ortdevice.metadata.Entries()) { + oss << key << "=" << value << ", "; + } + oss << "]}"; + LOGS_DEFAULT(INFO) << oss.str(); + } + + return discovered_devices; + }(); + + return devices; +} + +OrtHardwareDevice DeviceDiscovery::GetCpuDeviceFromCPUIDInfo() { + const auto& cpuid_info = CPUIDInfo::GetCPUIDInfo(); + + OrtHardwareDevice cpu_device{}; + cpu_device.vendor = cpuid_info.GetCPUVendor(); + cpu_device.vendor_id = cpuid_info.GetCPUVendorId(); + cpu_device.device_id = 0; + cpu_device.type = OrtHardwareDeviceType_CPU; + + return cpu_device; +} + +} // namespace onnxruntime diff --git a/onnxruntime/core/platform/posix/device_discovery.cc b/onnxruntime/core/platform/device_discovery_default.cc similarity index 57% rename from onnxruntime/core/platform/posix/device_discovery.cc rename to onnxruntime/core/platform/device_discovery_default.cc index 82564539ab5d4..73ddf516034ab 100644 --- a/onnxruntime/core/platform/posix/device_discovery.cc +++ b/onnxruntime/core/platform/device_discovery_default.cc @@ -4,14 +4,16 @@ #include "core/platform/device_discovery.h" namespace onnxruntime { + std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatform() { - std::unordered_set devices; - // get CPU devices + // This is a default implementation. + // We assume that there is a CPU device and do not attempt to discover anything else. - // get GPU devices + std::unordered_set devices{}; - // get NPU devices + devices.emplace(GetCpuDeviceFromCPUIDInfo()); return devices; } + } // namespace onnxruntime diff --git a/onnxruntime/core/platform/linux/device_discovery.cc b/onnxruntime/core/platform/linux/device_discovery.cc new file mode 100644 index 0000000000000..6a02a1b46028f --- /dev/null +++ b/onnxruntime/core/platform/linux/device_discovery.cc @@ -0,0 +1,173 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/platform/device_discovery.h" + +#include +#include +#include +#include + +#include "core/common/common.h" +#include "core/common/logging/logging.h" +#include "core/common/parse_string.h" +#include "core/common/string_utils.h" + +namespace fs = std::filesystem; + +namespace onnxruntime { + +namespace { + +Status ErrorCodeToStatus(const std::error_code& ec) { + if (!ec) { + return Status::OK(); + } + + return Status{common::StatusCategory::ONNXRUNTIME, common::StatusCode::FAIL, + MakeString("Error: std::error_code with category name: ", ec.category().name(), + ", value: ", ec.value(), ", message: ", ec.message())}; +} + +struct GpuSysfsPathInfo { + size_t card_idx; + fs::path path; +}; + +Status DetectGpuSysfsPaths(std::vector& gpu_sysfs_paths_out) { + std::error_code error_code{}; + const fs::path sysfs_class_drm_path = "/sys/class/drm"; + const bool sysfs_class_drm_path_exists = fs::exists(sysfs_class_drm_path, error_code); + ORT_RETURN_IF_ERROR(ErrorCodeToStatus(error_code)); + + if (!sysfs_class_drm_path_exists) { + gpu_sysfs_paths_out = std::vector{}; + return Status::OK(); + } + + const auto detect_card_path = [](const fs::path& sysfs_path, size_t& card_idx) -> bool { + const auto filename = sysfs_path.filename(); + const auto filename_str = std::string_view{filename.native()}; + + // Look for a filename matching "cardN". N is a number. + constexpr std::string_view prefix = "card"; + if (filename_str.find(prefix) != 0) { + return false; + } + + size_t parsed_card_idx{}; + if (!TryParseStringWithClassicLocale(filename_str.substr(prefix.size()), parsed_card_idx)) { + return false; + } + + card_idx = parsed_card_idx; + return true; + }; + + std::vector gpu_sysfs_paths{}; + + auto dir_iterator = fs::directory_iterator{sysfs_class_drm_path, error_code}; + ORT_RETURN_IF_ERROR(ErrorCodeToStatus(error_code)); + + for (const auto& dir_item : dir_iterator) { + const auto& dir_item_path = dir_item.path(); + + if (size_t card_idx{}; detect_card_path(dir_item_path, card_idx)) { + GpuSysfsPathInfo path_info{}; + path_info.card_idx = card_idx; + path_info.path = dir_item_path; + gpu_sysfs_paths.emplace_back(std::move(path_info)); + } + } + + gpu_sysfs_paths_out = std::move(gpu_sysfs_paths); + return Status::OK(); +} + +Status ReadFileContents(const fs::path& file_path, std::string& contents) { + std::ifstream file{file_path}; + ORT_RETURN_IF_NOT(file, "Failed to open file: ", file_path); + std::istreambuf_iterator file_begin{file}, file_end{}; + contents.assign(file_begin, file_end); + return Status::OK(); +} + +template +Status ReadValueFromFile(const fs::path& file_path, ValueType& value) { + std::string file_text{}; + ORT_RETURN_IF_ERROR(ReadFileContents(file_path, file_text)); + file_text = utils::TrimString(file_text); + return ParseStringWithClassicLocale(file_text, value); +} + +Status GetGpuDeviceFromSysfs(const GpuSysfsPathInfo& path_info, OrtHardwareDevice& gpu_device_out) { + OrtHardwareDevice gpu_device{}; + const auto& sysfs_path = path_info.path; + + // vendor id + { + const auto vendor_id_path = sysfs_path / "device" / "vendor"; + ORT_RETURN_IF_ERROR(ReadValueFromFile(vendor_id_path, gpu_device.vendor_id)); + } + + // TODO vendor name + + // device id + { + const auto device_id_path = sysfs_path / "device" / "device"; + ORT_RETURN_IF_ERROR(ReadValueFromFile(device_id_path, gpu_device.device_id)); + } + + // metadata + gpu_device.metadata.Add("card_idx", MakeString(path_info.card_idx)); + // TODO is card discrete? + + gpu_device.type = OrtHardwareDeviceType_GPU; + + gpu_device_out = std::move(gpu_device); + return Status::OK(); +} + +Status GetGpuDevices(std::vector& gpu_devices_out) { + std::vector gpu_sysfs_path_infos{}; + ORT_RETURN_IF_ERROR(DetectGpuSysfsPaths(gpu_sysfs_path_infos)); + + std::vector gpu_devices{}; + gpu_devices.reserve(gpu_sysfs_path_infos.size()); + + for (const auto& gpu_sysfs_path_info : gpu_sysfs_path_infos) { + OrtHardwareDevice gpu_device{}; + ORT_RETURN_IF_ERROR(GetGpuDeviceFromSysfs(gpu_sysfs_path_info, gpu_device)); + gpu_devices.emplace_back(std::move(gpu_device)); + } + + gpu_devices_out = std::move(gpu_devices); + return Status::OK(); +} + +} // namespace + +std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatform() { + std::unordered_set devices; + + // get CPU devices + devices.emplace(GetCpuDeviceFromCPUIDInfo()); + + // get GPU devices + { + std::vector gpu_devices{}; + Status gpu_device_discovery_status = GetGpuDevices(gpu_devices); + if (gpu_device_discovery_status.IsOK()) { + devices.insert(std::make_move_iterator(gpu_devices.begin()), + std::make_move_iterator(gpu_devices.end())); + } else { + LOGS_DEFAULT(WARNING) << "GPU device discovery failed: " << gpu_device_discovery_status.ErrorMessage(); + } + } + + // get NPU devices + // TODO figure out how to discover these + + return devices; +} +} // namespace onnxruntime diff --git a/onnxruntime/core/platform/windows/device_discovery.cc b/onnxruntime/core/platform/windows/device_discovery.cc index ff904ddb3e7e0..cf761f587ad0b 100644 --- a/onnxruntime/core/platform/windows/device_discovery.cc +++ b/onnxruntime/core/platform/windows/device_discovery.cc @@ -635,19 +635,6 @@ std::unordered_set DeviceDiscovery::DiscoverDevicesForPlatfor } } - std::ostringstream oss; - oss << "Adding OrtHardwareDevice {vendor_id:0x" << std::hex << ortdevice.vendor_id - << ", device_id:0x" << ortdevice.device_id - << ", vendor:" << ortdevice.vendor - << ", type:" << std::dec << static_cast(ortdevice.type) - << ", metadata: ["; - for (auto& [key, value] : ortdevice.metadata.Entries()) { - oss << key << "=" << value << ", "; - } - - oss << "]}" << std::endl; - LOGS_DEFAULT(INFO) << oss.str(); - return ortdevice; }; diff --git a/onnxruntime/test/common/string_utils_test.cc b/onnxruntime/test/common/string_utils_test.cc index 79f8ddff7b52a..983f7fa7a87f9 100644 --- a/onnxruntime/test/common/string_utils_test.cc +++ b/onnxruntime/test/common/string_utils_test.cc @@ -15,6 +15,8 @@ namespace test { namespace { template void TestSuccessfulParse(const std::string& input, const T& expected_value) { + SCOPED_TRACE(MakeString("Input: \"", input, "\", expected_value: ", expected_value)); + T value; ASSERT_TRUE(TryParseStringWithClassicLocale(input, value)); EXPECT_EQ(value, expected_value); @@ -22,6 +24,8 @@ void TestSuccessfulParse(const std::string& input, const T& expected_value) { template void TestFailedParse(const std::string& input) { + SCOPED_TRACE(MakeString("Input: \"", input, "\"")); + T value; EXPECT_FALSE(TryParseStringWithClassicLocale(input, value)); } @@ -31,6 +35,7 @@ TEST(StringUtilsTest, TryParseStringWithClassicLocale) { TestSuccessfulParse("-1", -1); TestSuccessfulParse("42", 42u); TestSuccessfulParse("2.5", 2.5f); + TestSuccessfulParse("0x100", uint32_t{0x100}); // out of range TestFailedParse("32768"); diff --git a/onnxruntime/test/platform/device_discovery_test.cc b/onnxruntime/test/platform/device_discovery_test.cc new file mode 100644 index 0000000000000..21ddf9a5b1cd7 --- /dev/null +++ b/onnxruntime/test/platform/device_discovery_test.cc @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/platform/device_discovery.h" + +#include "gtest/gtest.h" + +namespace onnxruntime::test { + +namespace { + +std::vector GetDevicesByType(OrtHardwareDeviceType device_type) { + std::vector result{}; + const auto& devices = DeviceDiscovery::GetDevices(); + std::copy_if(devices.begin(), devices.end(), std::back_inserter(result), + [device_type](const OrtHardwareDevice& device) { + return device.type == device_type; + }); + return result; +} + +} // namespace + +TEST(DeviceDiscoveryTest, HasCpuDevice) { + const auto cpu_devices = GetDevicesByType(OrtHardwareDeviceType_CPU); + ASSERT_GT(cpu_devices.size(), 0); + +#if !defined(__wasm__) + ASSERT_NE(cpu_devices[0].vendor_id, 0); +#endif // !defined(__WASM__) +} + +} // namespace onnxruntime::test diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index c8ef3e22b43f1..dd3e096c0334b 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1515,8 +1515,8 @@ def adb_push(src, dest, **kwargs): def adb_shell(*args, **kwargs): return run_subprocess([sdk_tool_paths.adb, "shell", *args], **kwargs) - def adb_install(*args, **kwargs): - return run_subprocess([sdk_tool_paths.adb, "install", *args], **kwargs) + def adb_logcat(*args, **kwargs): + return run_subprocess([sdk_tool_paths.adb, "logcat", *args], **kwargs) def run_adb_shell(cmd): # GCOV_PREFIX_STRIP specifies the depth of the directory hierarchy to strip and @@ -1542,6 +1542,17 @@ def run_adb_shell(cmd): ) context_stack.callback(android.stop_emulator, emulator_proc) + all_android_tests_passed = False + + def dump_logs_on_failure(): + if not all_android_tests_passed: + log.warning("Android test failed. Dumping logs.") + adb_logcat("-d") # dump logs + + context_stack.callback(dump_logs_on_failure) + + adb_logcat("-c") # clear logs + adb_push("testdata", device_dir, cwd=cwd) if is_linux() and os.path.exists("/data/onnx"): adb_push("/data/onnx", device_dir + "/test", cwd=cwd) @@ -1593,6 +1604,8 @@ def run_adb_shell(cmd): f"LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{device_dir} {device_dir}/onnxruntime_customopregistration_test" ) + all_android_tests_passed = True + def run_ios_tests(args, source_dir, config, cwd): is_targeting_iphone_simulator = "iphonesimulator" in args.apple_sysroot.lower()