Skip to content
Prev Previous commit
Next Next commit
Review comments
  • Loading branch information
kunalspathak committed Jun 21, 2022
commit 468dc4a479b74e580abfc1ea02f68e672537842a
62 changes: 38 additions & 24 deletions src/coreclr/gc/unix/gcenv.unix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,7 @@ static size_t GetLogicalProcessorCacheSizeFromOS()
#endif

#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_OSX)
if ((cacheSize == 0) || (cacheLevel != 3))
if (cacheSize == 0)
{
// We expect to get the L3 cache size for Arm64 but currently expected to be missing that info
// from most of the machines.
Expand All @@ -947,30 +947,12 @@ static size_t GetLogicalProcessorCacheSizeFromOS()
// If we use recent high core count chips as a guide for state of the art, we find
// total L3 cache to be 1-2MB/core. As always, there are exceptions.

// Hence, just use the following heuristics at best depending on the CPU count
// 1 ~ 4 : 4 MB
// 5 ~ 16 : 8 MB
// 17 ~ 64 : 16 MB
// 65+ : 32 MB
// Estimate cache size based on CPU count
// Assume lower core count are lighter weight parts which are likely to have smaller caches
// Assume L3$/CPU grows linearly from 256K to 1.5M/CPU as logicalCPUs grows from 2 to 12 CPUs
DWORD logicalCPUs = g_totalCpuCount;
if (logicalCPUs < 5)
{
cacheSize = 4;
}
else if (logicalCPUs < 17)
{
cacheSize = 8;
}
else if (logicalCPUs < 65)
{
cacheSize = 16;
}
else
{
cacheSize = 32;
}

cacheSize = cacheSize * 1024;
cacheSize = logicalCPUs * std::min(1536, std::max(256, (int)logicalCPUs * 128)) * 1024;
}
#endif

Expand All @@ -995,6 +977,38 @@ static size_t GetLogicalProcessorCacheSizeFromOS()
}
#endif

#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_OSX)
if (cacheLevel != 3)
{
// We expect to get the L3 cache size for Arm64 but currently expected to be missing that info
// from most of the machines.
// Hence, just use the following heuristics at best depending on the CPU count
// 1 ~ 4 : 4 MB
// 5 ~ 16 : 8 MB
// 17 ~ 64 : 16 MB
// 65+ : 32 MB
DWORD logicalCPUs = g_totalCpuCount;
if (logicalCPUs < 5)
{
cacheSize = 4;
}
else if (logicalCPUs < 17)
{
cacheSize = 8;
}
else if (logicalCPUs < 65)
{
cacheSize = 16;
}
else
{
cacheSize = 32;
}

cacheSize *= (1024 * 1024);
}
#endif

return cacheSize;
}

Expand Down Expand Up @@ -1071,7 +1085,7 @@ size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)
s_maxSize = maxSize;
s_maxTrueSize = maxTrueSize;

// printf("GetCacheSizePerLogicalCpu returns %d, adjusted size %d\n", maxSize, maxTrueSize);
// printf("GetCacheSizePerLogicalCpu returns %zu, adjusted size %zu\n", maxSize, maxTrueSize);
return trueSize ? maxTrueSize : maxSize;
}

Expand Down
24 changes: 14 additions & 10 deletions src/coreclr/gc/windows/gcenv.windows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ size_t GetLogicalProcessorCacheSizeFromOS()
{
size_t cache_size = 0;
size_t cache_level = 0;
uint32_t totalCPUCount = 0;

DWORD nEntries = 0;

Expand Down Expand Up @@ -432,6 +433,10 @@ size_t GetLogicalProcessorCacheSizeFromOS()
cache_level = pslpi[i].Cache.Level;
}
}
else if (pslpi[i].Relationship == RelationProcessorCore)
{
totalCPUCount++;
}
}
cache_size = last_cache_size;
}
Expand All @@ -450,25 +455,24 @@ size_t GetLogicalProcessorCacheSizeFromOS()
// 5 ~ 16 : 8 MB
// 17 ~ 64 : 16 MB
// 65+ : 32 MB
uint32_t logicalCPUs = GetTotalProcessorCount();
if (logicalCPUs < 5)
if (totalCPUCount < 5)
{
cacheSize = 4;
cache_size = 4;
}
else if (logicalCPUs < 17)
else if (totalCPUCount < 17)
{
cacheSize = 8;
cache_size = 8;
}
else if (logicalCPUs < 65)
else if (totalCPUCount < 65)
{
cacheSize = 16;
cache_size = 16;
}
else
{
cacheSize = 32;
cache_size = 32;
}

cacheSize *= 1024;
cache_size *= (1024 * 1024);
}
#endif // TARGET_ARM64

Expand Down Expand Up @@ -877,7 +881,7 @@ size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)
s_maxSize = maxSize;
s_maxTrueSize = maxTrueSize;

// printf("GetCacheSizePerLogicalCpu returns %d, adjusted size %d\n", maxSize, maxTrueSize);
// printf("GetCacheSizePerLogicalCpu returns %zu, adjusted size %zu\n", maxSize, maxTrueSize);
return trueSize ? maxTrueSize : maxSize;
}

Expand Down
60 changes: 37 additions & 23 deletions src/coreclr/pal/src/misc/sysinfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,7 @@ PAL_GetLogicalProcessorCacheSizeFromOS()
#endif

#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_OSX)
if ((cacheSize == 0) || (cacheLevel != 3))
if (cacheSize == 0)
{
// We expect to get the L3 cache size for Arm64 but currently expected to be missing that info
// from most of the machines with an exceptions on some machines.
Expand All @@ -612,30 +612,12 @@ PAL_GetLogicalProcessorCacheSizeFromOS()
// If we use recent high core count chips as a guide for state of the art, we find
// total L3 cache to be 1-2MB/core. As always, there are exceptions.

// Hence, just use the following heuristics at best depending on the CPU count
// 1 ~ 4 : 4 MB
// 5 ~ 16 : 8 MB
// 17 ~ 64 : 16 MB
// 65+ : 32 MB
// Estimate cache size based on CPU count
// Assume lower core count are lighter weight parts which are likely to have smaller caches
// Assume L3$/CPU grows linearly from 256K to 1.5M/CPU as logicalCPUs grows from 2 to 12 CPUs
DWORD logicalCPUs = PAL_GetLogicalCpuCountFromOS();
if (logicalCPUs < 5)
{
cacheSize = 4;
}
else if (logicalCPUs < 17)
{
cacheSize = 8;
}
else if (logicalCPUs < 65)
{
cacheSize = 16;
}
else
{
cacheSize = 32;
}

cacheSize = cacheSize * 1024;
cacheSize = logicalCPUs*std::min(1536, std::max(256, (int)logicalCPUs*128))*1024;
}
#endif

Expand All @@ -660,5 +642,37 @@ PAL_GetLogicalProcessorCacheSizeFromOS()
}
#endif

#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_OSX)
if (cacheLevel != 3)
{
// We expect to get the L3 cache size for Arm64 but currently expected to be missing that info
// from most of the machines.
// Hence, just use the following heuristics at best depending on the CPU count
// 1 ~ 4 : 4 MB
// 5 ~ 16 : 8 MB
// 17 ~ 64 : 16 MB
// 65+ : 32 MB
DWORD logicalCPUs = g_totalCpuCount;
if (logicalCPUs < 5)
{
cacheSize = 4;
}
else if (logicalCPUs < 17)
{
cacheSize = 8;
}
else if (logicalCPUs < 65)
{
cacheSize = 16;
}
else
{
cacheSize = 32;
}

cacheSize *= (1024 * 1024);
}
#endif

return cacheSize;
}
33 changes: 20 additions & 13 deletions src/coreclr/vm/gcenv.os.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,7 @@ size_t GetLogicalProcessorCacheSizeFromOS()
{
size_t cache_size = 0;
size_t cache_level = 0;
uint32_t totalCPUCount = 0;
DWORD nEntries = 0;

// Try to use GetLogicalProcessorInformation API and get a valid pointer to the SLPI array if successful. Returns NULL
Expand All @@ -609,10 +610,17 @@ size_t GetLogicalProcessorCacheSizeFromOS()

for (DWORD i=0; i < nEntries; i++)
{
if (last_cache_size < pslpi[i].Cache.Size)
if (pslpi[i].Relationship == RelationCache)
{
last_cache_size = pslpi[i].Cache.Size;
cache_level = pslpi[i].Cache.Level;
if (last_cache_size < pslpi[i].Cache.Size)
{
last_cache_size = pslpi[i].Cache.Size;
cache_level = pslpi[i].Cache.Level;
}
}
else if (pslpi[i].Relationship == RelationProcessorCore)
{
totalCPUCount++;
}
}
cache_size = last_cache_size;
Expand All @@ -632,25 +640,24 @@ size_t GetLogicalProcessorCacheSizeFromOS()
// 5 ~ 16 : 8 MB
// 17 ~ 64 : 16 MB
// 65+ : 32 MB
uint32_t logicalCPUs = GetTotalProcessorCount();
if (logicalCPUs < 5)
if (totalCPUCount < 5)
{
cacheSize = 4;
cache_size = 4;
}
else if (logicalCPUs < 17)
else if (totalCPUCount < 17)
{
cacheSize = 8;
cache_size = 8;
}
else if (logicalCPUs < 65)
else if (totalCPUCount < 65)
{
cacheSize = 16;
cache_size = 16;
}
else
{
cacheSize = 32;
cache_size = 32;
}

cacheSize *= 1024;
cache_size *= (1024 * 1024);
}
#endif // TARGET_ARM64

Expand Down Expand Up @@ -683,7 +690,7 @@ size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)
s_maxSize = maxSize;
s_maxTrueSize = maxTrueSize;

// printf("GetCacheSizePerLogicalCpu returns %d, adjusted size %d\n", maxSize, maxTrueSize);
// printf("GetCacheSizePerLogicalCpu returns %zu, adjusted size %zu\n", maxSize, maxTrueSize);
return trueSize ? maxTrueSize : maxSize;
}

Expand Down