Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
da29c61
Add dependencies
Mar 4, 2020
423d830
Add config var
Mar 4, 2020
ad228ef
Move portable RegisteredWaitHandle implementation to shared ThreadPoo…
Mar 9, 2020
1155fb0
Merge RegisteredWaitHandle implementations
Mar 4, 2020
6f9b3dc
Separate portable-only portion of RegisteredWaitHandle
May 24, 2020
d792cd7
Fix timers, tiered compilation, introduced time-sensitive work item q…
Mar 15, 2020
a911883
Implement ResetThreadPoolThread, set thread names for diagnostics
Mar 15, 2020
40ce9d0
Cache-line-separate PortableThreadPool._numRequestedWorkers similarly…
Mar 16, 2020
a5d3c2b
Post wait completions to the IO completion port on Windows for corecl…
Mar 17, 2020
ad632be
Reroute managed gate thread into unmanaged side to perform gate activ…
Mar 18, 2020
f942773
Flow config values from CoreCLR to the portable thread pool for compat
Mar 22, 2020
0ef2079
Port - 44970522045f0c323f5735c4fe4b54bd8f71e800 - Fix hill climbing f…
Mar 23, 2020
3151057
Port - aa5ce2b1bc9ac553ddd493e269a53c999d61e965 - Limit min threads i…
Mar 23, 2020
5d8d4ae
Port - 8cc2aa35933677339b9e9ec5485754aa750907df - Optimize AdjustMaxW…
Mar 24, 2020
3916619
Fix ETW events
Mar 25, 2020
88ea9c4
Fix perf of counts structs
Mar 27, 2020
8ef63f9
Fix perf of dispatch loop
Mar 28, 2020
e01df4f
Fix perf of ThreadInt64PersistentCounter
Mar 29, 2020
7c89f81
Miscellaneous perf fixes
Apr 1, 2020
13c2d62
Fix starvation heuristic
Jun 1, 2020
dd23472
Implement worker tracking
May 23, 2020
6cd2b2d
Use smaller stack size for threads that don't run user code
May 27, 2020
c8bdfad
Note some SOS dependencies, small fixes in hill climbing to make equi…
Jun 1, 2020
38c00d5
Port some tests from CoreRT
Jun 7, 2020
0ff3b03
Fail-fast in thread pool native entry points specific to thread pool …
Jun 7, 2020
cc35f92
Fix SetMinThreads() and SetMaxThreads() to return true only when both…
Jun 8, 2020
79eb3b9
Fix registered wait removals for fairness since there can be duplicat…
Jun 9, 2020
8b309cc
Allow multiple DotNETRuntime event providers/sources in EventPipe
Jun 12, 2020
ac1917d
Fix registered wait handle timeout logic in the wait thread
Jun 16, 2020
73f911c
Fix Browser build
Jun 22, 2020
1b99da7
Remove unnecessary methods from Browser thread pool, address some fee…
Jun 24, 2020
17b3d2d
Fix build warning after merge
Jun 30, 2020
e8043ff
Address feedback for events, undo EventPipe change in mono, change ev…
Jun 30, 2020
8d9cc2d
Disable new timer test for browser
Jul 1, 2020
f03709b
Fix EventSource tests to expect the new provider name used in mono
Jul 1, 2020
ff48985
Revert event source name in mono to match coreclr
Jul 3, 2020
93563e6
Add issue link for prerequisites of enabling the portable thread pool…
Jul 3, 2020
100dad7
Address feedback
Jul 6, 2020
4c7f778
Fix race condition in registered wait unregister
Jul 11, 2020
d55c5fc
Fix a new issue in WaitThread after shifting items in arrays
Jul 12, 2020
996597f
Fix usage of LowLevelMonitor after rebase
Sep 10, 2020
82f7cda
Fix tests that use RemoteExecutor to include a test for whether it is…
Sep 11, 2020
b53a5b9
Fix build
Sep 11, 2020
7dcb267
Slight fix to starvation heuristic to be closer to what it was before
Sep 25, 2020
65840d8
Fix license headers in new files
Oct 9, 2020
fb28ad2
Address feedback
Oct 9, 2020
04326b6
Change pattern for accessing the event source for better ILLinker com…
Oct 16, 2020
ec038cf
Move hill climbing config reads into constructor instead of passing t…
Oct 16, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Miscellaneous perf fixes
  • Loading branch information
Koundinya Veluri committed Oct 20, 2020
commit 7c89f816ec47be62ef4cd28967ab6278c8d272e2
Original file line number Diff line number Diff line change
Expand Up @@ -423,11 +423,14 @@ private static void UnsafeQueueUnmanagedWorkItem(IntPtr callback, IntPtr state)
private static extern void GetAvailableThreadsNative(out int workerThreads, out int completionPortThreads);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool NotifyWorkItemComplete(object? threadLocalCompletionCountObject)
internal static bool NotifyWorkItemComplete(object? threadLocalCompletionCountObject, int currentTimeMs)
{
if (UsePortableThreadPool)
{
return PortableThreadPool.ThreadPoolInstance.NotifyWorkItemComplete(threadLocalCompletionCountObject);
return
PortableThreadPool.ThreadPoolInstance.NotifyWorkItemComplete(
threadLocalCompletionCountObject,
currentTimeMs);
}

return NotifyWorkItemCompleteNative();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -666,9 +666,28 @@ private void EnqueueSlow(T item)
/// true if an element was removed and returned from the beginning of the
/// <see cref="ConcurrentQueue{T}"/> successfully; otherwise, false.
/// </returns>
public bool TryDequeue([MaybeNullWhen(false)] out T result) =>
_head.TryDequeue(out result) || // fast-path that operates just on the head segment
TryDequeueSlow(out result); // slow path that needs to fix up segments
public bool TryDequeue([MaybeNullWhen(false)] out T result)
{
// Get the current head
ConcurrentQueueSegment<T> head = _head;

// Try to take. If we're successful, we're done.
if (head.TryDequeue(out result))
{
return true;
}

// Check to see whether this segment is the last. If it is, we can consider
// this to be a moment-in-time empty condition (even though between the TryDequeue
// check and this check, another item could have arrived).
if (head._nextSegment == null)
{
result = default!;
return false;
}

return TryDequeueSlow(out result); // slow path that needs to fix up segments
}

/// <summary>Tries to dequeue an item, removing empty segments as needed.</summary>
private bool TryDequeueSlow([MaybeNullWhen(false)] out T item)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ internal sealed partial class LowLevelLifoSemaphore : IDisposable

private readonly int _maximumSignalCount;
private readonly int _spinCount;
private readonly Action _onWait;

private const int SpinSleep0Threshold = 10;

public LowLevelLifoSemaphore(int initialSignalCount, int maximumSignalCount, int spinCount)
public LowLevelLifoSemaphore(int initialSignalCount, int maximumSignalCount, int spinCount, Action onWait)
{
Debug.Assert(initialSignalCount >= 0);
Debug.Assert(initialSignalCount <= maximumSignalCount);
Expand All @@ -30,6 +31,7 @@ public LowLevelLifoSemaphore(int initialSignalCount, int maximumSignalCount, int
_separated._counts.SignalCount = (uint)initialSignalCount;
_maximumSignalCount = maximumSignalCount;
_spinCount = spinCount;
_onWait = onWait;

Create(maximumSignalCount);
}
Expand All @@ -38,6 +40,8 @@ public bool Wait(int timeoutMs)
{
Debug.Assert(timeoutMs >= -1);

int spinCount = _spinCount;

// Try to acquire the semaphore or
// a) register as a spinner if spinCount > 0 and timeoutMs > 0
// b) register as a waiter if there's already too many spinners or spinCount == 0 and timeoutMs > 0
Expand All @@ -53,7 +57,7 @@ public bool Wait(int timeoutMs)
}
else if (timeoutMs != 0)
{
if (_spinCount > 0 && newCounts.SpinnerCount < byte.MaxValue)
if (spinCount > 0 && newCounts.SpinnerCount < byte.MaxValue)
{
newCounts.IncrementSpinnerCount();
}
Expand Down Expand Up @@ -85,9 +89,13 @@ public bool Wait(int timeoutMs)
counts = countsBeforeUpdate;
}

#if CORECLR && TARGET_UNIX
// The PAL's wait subsystem is slower, spin more to compensate for the more expensive wait
spinCount *= 2;
#endif
int processorCount = Environment.ProcessorCount;
int spinIndex = processorCount > 1 ? 0 : SpinSleep0Threshold;
while (spinIndex < _spinCount)
while (spinIndex < spinCount)
{
LowLevelSpinWaiter.Wait(spinIndex, SpinSleep0Threshold, processorCount);
spinIndex++;
Expand Down Expand Up @@ -189,6 +197,8 @@ private bool WaitForSignal(int timeoutMs)
{
Debug.Assert(timeoutMs > 0 || timeoutMs == -1);

_onWait();

while (true)
{
if (!WaitCore(timeoutMs))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace System.Threading
{
Expand All @@ -13,8 +15,6 @@ private static class GateThread
private const int DequeueDelayThresholdMs = GateThreadDelayMs * 2;
private const int GateThreadRunningMask = 0x4;

private static int s_runningState;

private static readonly AutoResetEvent s_runGateThreadEvent = new AutoResetEvent(initialState: true);

private const int MaxRuns = 2;
Expand All @@ -32,6 +32,9 @@ private static void GateThreadStart()
CpuUtilizationReader cpuUtilizationReader = default;
_ = cpuUtilizationReader.CurrentUtilization;

PortableThreadPool threadPoolInstance = ThreadPoolInstance;
LowLevelLock hillClimbingThreadAdjustmentLock = threadPoolInstance._hillClimbingThreadAdjustmentLock;

while (true)
{
s_runGateThreadEvent.WaitOne();
Expand All @@ -42,20 +45,20 @@ private static void GateThreadStart()
Thread.Sleep(GateThreadDelayMs);

int cpuUtilization = cpuUtilizationReader.CurrentUtilization;
ThreadPoolInstance._cpuUtilization = cpuUtilization;
threadPoolInstance._cpuUtilization = cpuUtilization;

needGateThreadForRuntime = ThreadPool.PerformRuntimeSpecificGateActivities(cpuUtilization);

if (!disableStarvationDetection &&
ThreadPoolInstance._separated.numRequestedWorkers > 0 &&
SufficientDelaySinceLastDequeue())
threadPoolInstance._separated.numRequestedWorkers > 0 &&
SufficientDelaySinceLastDequeue(threadPoolInstance))
{
try
{
ThreadPoolInstance._hillClimbingThreadAdjustmentLock.Acquire();
ThreadCounts counts = ThreadPoolInstance._separated.counts.VolatileRead();
hillClimbingThreadAdjustmentLock.Acquire();
ThreadCounts counts = threadPoolInstance._separated.counts.VolatileRead();
// don't add a thread if we're at max or if we are already in the process of adding threads
while (counts.NumExistingThreads < ThreadPoolInstance._maxThreads && counts.NumExistingThreads >= counts.NumThreadsGoal)
while (counts.NumExistingThreads < threadPoolInstance._maxThreads && counts.NumExistingThreads >= counts.NumThreadsGoal)
{
if (debuggerBreakOnWorkStarvation)
{
Expand All @@ -65,11 +68,11 @@ private static void GateThreadStart()
ThreadCounts newCounts = counts;
short newNumThreadsGoal = (short)(newCounts.NumExistingThreads + 1);
newCounts.NumThreadsGoal = newNumThreadsGoal;
ThreadCounts oldCounts = ThreadPoolInstance._separated.counts.InterlockedCompareExchange(newCounts, counts);
ThreadCounts oldCounts = threadPoolInstance._separated.counts.InterlockedCompareExchange(newCounts, counts);
if (oldCounts == counts)
{
HillClimbing.ThreadPoolHillClimber.ForceChange(newNumThreadsGoal, HillClimbing.StateOrTransition.Starvation);
WorkerThread.MaybeAddWorkingWorker();
WorkerThread.MaybeAddWorkingWorker(threadPoolInstance);
break;
}

Expand All @@ -78,62 +81,61 @@ private static void GateThreadStart()
}
finally
{
ThreadPoolInstance._hillClimbingThreadAdjustmentLock.Release();
hillClimbingThreadAdjustmentLock.Release();
}
}
} while (
needGateThreadForRuntime ||
ThreadPoolInstance._separated.numRequestedWorkers > 0 ||
Interlocked.Decrement(ref s_runningState) > GetRunningStateForNumRuns(0));
threadPoolInstance._separated.numRequestedWorkers > 0 ||
Interlocked.Decrement(ref threadPoolInstance._separated.gateThreadRunningState) > GetRunningStateForNumRuns(0));
}
}

// called by logic to spawn new worker threads, return true if it's been too long
// since the last dequeue operation - takes number of worker threads into account
// in deciding "too long"
private static bool SufficientDelaySinceLastDequeue()
private static bool SufficientDelaySinceLastDequeue(PortableThreadPool threadPoolInstance)
{
int delay = Environment.TickCount - Volatile.Read(ref ThreadPoolInstance._separated.lastDequeueTime);
int delay = Environment.TickCount - Volatile.Read(ref threadPoolInstance._separated.lastDequeueTime);

int minimumDelay;

if (ThreadPoolInstance._cpuUtilization < CpuUtilizationLow)
if (threadPoolInstance._cpuUtilization < CpuUtilizationLow)
{
minimumDelay = GateThreadDelayMs;
}
else
{
ThreadCounts counts = ThreadPoolInstance._separated.counts.VolatileRead();
ThreadCounts counts = threadPoolInstance._separated.counts.VolatileRead();
int numThreads = counts.NumThreadsGoal;
minimumDelay = numThreads * DequeueDelayThresholdMs;
}
return delay > minimumDelay;
}

// This is called by a worker thread
internal static void EnsureRunning()
internal static void EnsureRunning(PortableThreadPool threadPoolInstance)
{
int numRunsMask = Interlocked.Exchange(ref s_runningState, GetRunningStateForNumRuns(MaxRuns));
if ((numRunsMask & GateThreadRunningMask) == 0)
// The callers ensure that this speculative load is sufficient to ensure that the gate thread is activated when
// it is needed
if (threadPoolInstance._separated.gateThreadRunningState != GetRunningStateForNumRuns(MaxRuns))
{
bool created = false;
try
{
CreateGateThread();
created = true;
}
finally
{
if (!created)
{
Interlocked.Exchange(ref s_runningState, 0);
}
}
EnsureRunningSlow(threadPoolInstance);
}
else if (numRunsMask == GetRunningStateForNumRuns(0))
}

[MethodImpl(MethodImplOptions.NoInlining)]
internal static void EnsureRunningSlow(PortableThreadPool threadPoolInstance)
{
int numRunsMask = Interlocked.Exchange(ref threadPoolInstance._separated.gateThreadRunningState, GetRunningStateForNumRuns(MaxRuns));
if (numRunsMask == GetRunningStateForNumRuns(0))
{
s_runGateThreadEvent.Set();
}
else if ((numRunsMask & GateThreadRunningMask) == 0)
{
CreateGateThread(threadPoolInstance);
}
}

private static int GetRunningStateForNumRuns(int numRuns)
Expand All @@ -143,16 +145,29 @@ private static int GetRunningStateForNumRuns(int numRuns)
return GateThreadRunningMask | numRuns;
}

private static void CreateGateThread()
[MethodImpl(MethodImplOptions.NoInlining)]
private static void CreateGateThread(PortableThreadPool threadPoolInstance)
{
Thread gateThread = new Thread(GateThreadStart);
gateThread.IsThreadPoolThread = true;
gateThread.IsBackground = true;
gateThread.Name = ".NET ThreadPool Gate";
gateThread.Start();
bool created = false;
try
{
Thread gateThread = new Thread(GateThreadStart);
gateThread.IsThreadPoolThread = true;
gateThread.IsBackground = true;
gateThread.Name = ".NET ThreadPool Gate";
gateThread.Start();
created = true;
}
finally
{
if (!created)
{
Interlocked.Exchange(ref threadPoolInstance._separated.gateThreadRunningState, 0);
}
}
}
}

internal static void EnsureGateThreadRunning() => GateThread.EnsureRunning();
internal static void EnsureGateThreadRunning() => GateThread.EnsureRunning(ThreadPoolInstance);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,8 @@ public HillClimbing(int wavePeriod, int maxWaveMagnitude, double waveMagnitudeMu
//
// If the result was positive, and CPU is > 95%, refuse the move.
//
if (move > 0.0 && ThreadPoolInstance._cpuUtilization > CpuUtilizationHigh)
PortableThreadPool threadPoolInstance = ThreadPoolInstance;
if (move > 0.0 && threadPoolInstance._cpuUtilization > CpuUtilizationHigh)
move = 0.0;

//
Expand All @@ -339,8 +340,8 @@ public HillClimbing(int wavePeriod, int maxWaveMagnitude, double waveMagnitudeMu
//
// Make sure our control setting is within the ThreadPool's limits
//
int maxThreads = ThreadPoolInstance._maxThreads;
int minThreads = ThreadPoolInstance._minThreads;
int maxThreads = threadPoolInstance._maxThreads;
int minThreads = threadPoolInstance._minThreads;

_currentControlSetting = Math.Min(maxThreads - newThreadWaveMagnitude, _currentControlSetting);
_currentControlSetting = Math.Max(minThreads, _currentControlSetting);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,8 @@ private void WaitThreadStart()
/// </summary>
private void ProcessRemovals()
{
ThreadPoolInstance._waitThreadLock.Acquire();
PortableThreadPool threadPoolInstance = ThreadPoolInstance;
threadPoolInstance._waitThreadLock.Acquire();
try
{
Debug.Assert(_numPendingRemoves >= 0);
Expand Down Expand Up @@ -322,7 +323,7 @@ private void ProcessRemovals()
}
finally
{
ThreadPoolInstance._waitThreadLock.Release();
threadPoolInstance._waitThreadLock.Release();
}
}

Expand Down Expand Up @@ -398,7 +399,8 @@ private void UnregisterWait(RegisteredWaitHandle handle, bool blocking)
{
bool pendingRemoval = false;
// TODO: Optimization: Try to unregister wait directly if it isn't being waited on.
ThreadPoolInstance._waitThreadLock.Acquire();
PortableThreadPool threadPoolInstance = ThreadPoolInstance;
threadPoolInstance._waitThreadLock.Acquire();
try
{
// If this handle is not already pending removal and hasn't already been removed
Expand All @@ -411,7 +413,7 @@ private void UnregisterWait(RegisteredWaitHandle handle, bool blocking)
}
finally
{
ThreadPoolInstance._waitThreadLock.Release();
threadPoolInstance._waitThreadLock.Release();
}

if (blocking)
Expand Down
Loading