Skip to content
Closed
51 changes: 27 additions & 24 deletions src/coreclr/gc/unix/gcenv.unix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -915,30 +915,33 @@ static size_t GetLogicalProcessorCacheSizeFromOS()
#endif

#if defined(HOST_ARM64) && !defined(TARGET_OSX)
if (cacheSize == 0)
{
// It is currently expected to be missing cache size info
//
// _SC_LEVEL*_*CACHE_SIZE is not yet present. Work is in progress to enable this for arm64
//
// /sys/devices/system/cpu/cpu*/cache/index*/ is also not yet present in most systems.
// Arm64 patch is in Linux kernel tip.
//
// midr_el1 is available in "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1",
// but without an exhaustive list of ARM64 processors any decode of midr_el1
// Would likely be incomplete

// Published information on ARM64 architectures is limited.
// If we use recent high core count chips as a guide for state of the art, we find
// total L3 cache to be 1-2MB/core. As always, there are exceptions.

// Estimate cache size based on CPU count
// Assume lower core count are lighter weight parts which are likely to have smaller caches
// Assume L3$/CPU grows linearly from 256K to 1.5M/CPU as logicalCPUs grows from 2 to 12 CPUs
DWORD logicalCPUs = g_totalCpuCount;

cacheSize = logicalCPUs * std::min(1536, std::max(256, (int)logicalCPUs * 128)) * 1024;
}
// It is currently expected to be missing cache size info
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A lot of information in this comment is no longer relevant. Could you please update this comment to only include what is still relevant?

//
// _SC_LEVEL*_*CACHE_SIZE is not yet present. Work is in progress to enable this for arm64
//
// /sys/devices/system/cpu/cpu*/cache/index*/ is also not yet present in most systems.
// Arm64 patch is in Linux kernel tip.
//
// midr_el1 is available in "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1",
// but without an exhaustive list of ARM64 processors any decode of midr_el1
// Would likely be incomplete

// Published information on ARM64 architectures is limited.
// If we use recent high core count chips as a guide for state of the art, we find
// total L3 cache to be 1-2MB/core. As always, there are exceptions.

// Estimate cache size based on CPU count
// Assume lower core count are lighter weight parts which are likely to have smaller caches
// Assume shared L3 grows linearly from 256Kb to 4Mb as logicalCPUs grows from 2 to 32 CPUs

// As of 2022, in most cases /sys/devices/system/cpu/cpu*/cache/index*/ does present, but only
// reports L2 cache size and says nothing about L3 even if it exists. In this case we don't want
// to stuck with L2 (e.g. 256Kb on our test machine whether the real L3 is 32Mb)
// More details: https://github.com/dotnet/runtime/issues/60166
DWORD logicalCPUs = GCToOSInterface::GetTotalProcessorCount();

size_t predictedSize = std::min(4096, std::max(256, (int)logicalCPUs*128))*1024;
cacheSize = std::max(predictedSize, cacheSize);
#endif

#if HAVE_SYSCTLBYNAME
Expand Down
15 changes: 15 additions & 0 deletions src/coreclr/gc/windows/gcenv.windows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,21 @@ size_t GetLogicalProcessorCacheSizeFromOS()
if(pslpi)
delete[] pslpi; // release the memory allocated for the SLPI array.


#ifdef TARGET_ARM64
// GetLogicalProcessorInformation doesn't report L3 cache size on our win-arm64 environment (current cache_size most
// likely represent L2 instead). We're going to use a processor-count based heuristic to predict its size and pick
// whatever is bigger. The same heuristic is used for Linux-arm64.
// More info: https://github.com/dotnet/runtime/issues/60166
uint32_t logicalCPUs = GCToOSInterface::GetTotalProcessorCount();

// Estimate cache size based on CPU count
// Assume lower core count are lighter weight parts which are likely to have smaller caches
// Assume shared L3 grows linearly from 256Kb to 4Mb as logicalCPUs grows from 2 to 32 CPUs
size_t predictedSize = min(4096, max(256, logicalCPUs * 128)) * 1024;
cache_size = max(predictedSize, cache_size);
#endif

return cache_size;
}

Expand Down
51 changes: 27 additions & 24 deletions src/coreclr/pal/src/misc/sysinfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -582,30 +582,33 @@ PAL_GetLogicalProcessorCacheSizeFromOS()
#endif

#if defined(HOST_ARM64) && !defined(TARGET_OSX)
if (cacheSize == 0)
{
// It is currently expected to be missing cache size info
//
// _SC_LEVEL*_*CACHE_SIZE is not yet present. Work is in progress to enable this for arm64
//
// /sys/devices/system/cpu/cpu*/cache/index*/ is also not yet present in most systems.
// Arm64 patch is in Linux kernel tip.
//
// midr_el1 is available in "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1",
// but without an exhaustive list of ARM64 processors any decode of midr_el1
// Would likely be incomplete

// Published information on ARM64 architectures is limited.
// If we use recent high core count chips as a guide for state of the art, we find
// total L3 cache to be 1-2MB/core. As always, there are exceptions.

// Estimate cache size based on CPU count
// Assume lower core count are lighter weight parts which are likely to have smaller caches
// Assume L3$/CPU grows linearly from 256K to 1.5M/CPU as logicalCPUs grows from 2 to 12 CPUs
DWORD logicalCPUs = PAL_GetLogicalCpuCountFromOS();

cacheSize = logicalCPUs*std::min(1536, std::max(256, (int)logicalCPUs*128))*1024;
}
// It is currently expected to be missing cache size info
//
// _SC_LEVEL*_*CACHE_SIZE is not yet present. Work is in progress to enable this for arm64
//
// /sys/devices/system/cpu/cpu*/cache/index*/ is also not yet present in most systems.
// Arm64 patch is in Linux kernel tip.
//
// midr_el1 is available in "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1",
// but without an exhaustive list of ARM64 processors any decode of midr_el1
// Would likely be incomplete

// Published information on ARM64 architectures is limited.
// If we use recent high core count chips as a guide for state of the art, we find
// total L3 cache to be 1-2MB/core. As always, there are exceptions.

// Estimate cache size based on CPU count
// Assume lower core count are lighter weight parts which are likely to have smaller caches
// Assume shared L3 grows linearly from 256Kb to 4Mb as logicalCPUs grows from 2 to 32 CPUs

// As of 2022, in most cases /sys/devices/system/cpu/cpu*/cache/index*/ does present, but only
// reports L2 cache size and says nothing about L3 even if it exists. In this case we don't want
// to stuck with L2 (e.g. 256Kb on our test machine whether the real L3 is 32Mb)
// More details: https://github.com/dotnet/runtime/issues/60166
DWORD logicalCPUs = PAL_GetLogicalCpuCountFromOS();

size_t predictedSize = std::min(4096, std::max(256, (int)logicalCPUs*128))*1024;
cacheSize = std::max(predictedSize, cacheSize);
#endif

#if HAVE_SYSCTLBYNAME
Expand Down