-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Improve ImmutableArray.RemoveRange #65186
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c8e6676
b00ba1c
62cb66b
0cfee91
de7c2c9
c5534ef
683f7b1
9624f8a
c85cde1
f526fed
757fdef
b703751
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -6,6 +6,8 @@ | |||||
| using System.Diagnostics.CodeAnalysis; | ||||||
| using System.Globalization; | ||||||
| using System.Linq; | ||||||
| using System.Runtime.CompilerServices; | ||||||
| using System.Runtime.InteropServices; | ||||||
| using System.Runtime.Versioning; | ||||||
|
|
||||||
| namespace System.Collections.Immutable | ||||||
|
|
@@ -575,17 +577,9 @@ public ImmutableArray<T> RemoveRange(IEnumerable<T> items, IEqualityComparer<T>? | |||||
| self.ThrowNullRefIfNotInitialized(); | ||||||
| Requires.NotNull(items, nameof(items)); | ||||||
|
|
||||||
| var indicesToRemove = new SortedSet<int>(); | ||||||
| foreach (T item in items) | ||||||
| { | ||||||
| int index = -1; | ||||||
| do | ||||||
| { | ||||||
| index = self.IndexOf(item, index + 1, equalityComparer); | ||||||
| } while (index >= 0 && !indicesToRemove.Add(index) && index < self.Length - 1); | ||||||
| } | ||||||
|
|
||||||
| return self.RemoveAtRange(indicesToRemove); | ||||||
| // Avoid to allocate for true enumerable | ||||||
| ICollection<int>? indicesToRemove = items.TryGetCount(out _) ? self.FindOrderedIndicesToRemoveByMultiSet(items, equalityComparer) : self.FindOrderedIndicesToRemove(items, equalityComparer); | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure I understand why we need to fall back to the old algorithm if we can't determine the size?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See previous comment(61196)
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe add a comment explaining this? What about collections with very large counts?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Added.
Not sure, I just feel the fall back code is for true enumerable data which sounds like long
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One possible approach might be to fall back to the slow implementation once the multiset exceeds a certain capacity dynamically: private ICollection<int>? FindOrderedIndicesToRemoveByMultiSet(IEnumerable<T> items, IEqualityComparer<T>? equalityComparer)
{
var multiSet = new MultiSet(equalityComparer);
int i = 0;
foreach (T item in items)
{
multiSet.Add(item);
if (++i == 100) // threshold way above what I would expect is the typical input enumerable.
{
return FindOrderedIndicesToRemove(items, equalityComparer);
}
}
return GetOrderedIndicesToRemoveFor(multiSet);
}I would strongly encourage you to make any decision based on benchmark numbers, however.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. // Not check size
public ICollection<int>? Fast_NoLimit_FindOrderedIndicesToRemoveByMultiSet(IEnumerable<T> items, IEqualityComparer<T>? equalityComparer)
{
var multiSet = new MultiSet(equalityComparer);
foreach (T item in items)
{
multiSet.Add(item);
}
return GetOrderedIndicesToRemoveFor(multiSet);
}
// Firstly try to get collection count property directly then fall back to iterate to trigger size limit dynamically
public ICollection<int>? Fast_TryGetCountThenIterateCheckCount(IEnumerable<T> items, IEqualityComparer<T>? equalityComparer)
{
if (items.TryGetCount(out int count))
{
if (count <= ThrottleValue)
{
return Fast_NoLimit_FindOrderedIndicesToRemoveByMultiSet(items, equalityComparer);
}
return SlowFindOrderedIndicesToRemove(items, equalityComparer);
}
return Fast_IterateCheckCount(items, equalityComparer);
}
// Firstly try to get collection count property directly then fall back to iterate to trigger size limit compared with actual dictionary.count (dedup same value purpose) dynamically
public ICollection<int>? Fast_TryGetCountThenIterateCheckCountByDicLength(IEnumerable<T> items, IEqualityComparer<T>? equalityComparer)
{
if (items.TryGetCount(out int count))
{
if (count <= ThrottleValue)
{
return Fast_NoLimit_FindOrderedIndicesToRemoveByMultiSet(items, equalityComparer);
}
return SlowFindOrderedIndicesToRemove(items, equalityComparer);
}
return Fast_IterateCheckCountByDicLength(items, equalityComparer);
}
// Iterate to trigger size limit compared with actual dictionary.count (dedup same value purpose) dynamically
public ICollection<int>? Fast_IterateCheckCountByDicLength(IEnumerable<T> items, IEqualityComparer<T>? equalityComparer)
{
var multiSet = new MultiSet(equalityComparer);
foreach (T item in items)
{
if (multiSet.ItemCount > ThrottleValue)
{
return SlowFindOrderedIndicesToRemove(items, equalityComparer);
}
multiSet.Add(item);
}
return GetOrderedIndicesToRemoveFor(multiSet);
}
// Iterate to trigger size limit dynamically
public ICollection<int>? Fast_IterateCheckCount(IEnumerable<T> items, IEqualityComparer<T>? equalityComparer)
{
var multiSet = new MultiSet(equalityComparer);
int i = 0;
foreach (T item in items)
{
if (++i > ThrottleValue)
{
return SlowFindOrderedIndicesToRemove(items, equalityComparer);
}
multiSet.Add(item);
}
return GetOrderedIndicesToRemoveFor(multiSet);
}
private readonly struct MultiSet
{
public int ItemCount => _dictionary.Count;
}Benchmark condition: Result: So if we need to have size limit, maybe
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, I think
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmmm, these numbers make me think that this optimization is likely not worth our while. I don't think it's worth optimizing for inputs > 30, it's unlikely that this method would be used with so large inputs. I'd probably just close this PR. Thanks for driving the performance investigation! |
||||||
| return indicesToRemove == null ? self : self.RemoveAtRange(indicesToRemove); | ||||||
| } | ||||||
|
|
||||||
| /// <summary> | ||||||
|
|
@@ -881,17 +875,9 @@ public ImmutableArray<T> RemoveRange(ReadOnlySpan<T> items, IEqualityComparer<T> | |||||
| return self.Remove(items[0], equalityComparer); | ||||||
| } | ||||||
|
|
||||||
| var indicesToRemove = new SortedSet<int>(); | ||||||
| foreach (T item in items) | ||||||
| { | ||||||
| int index = -1; | ||||||
| do | ||||||
| { | ||||||
| index = self.IndexOf(item, index + 1, equalityComparer); | ||||||
| } while (index >= 0 && !indicesToRemove.Add(index) && index < self.Length - 1); | ||||||
| } | ||||||
| ICollection<int>? indicesToRemove = self.FindOrderedIndicesToRemoveByMultiSet(items, equalityComparer); | ||||||
|
|
||||||
| return self.RemoveAtRange(indicesToRemove); | ||||||
| return indicesToRemove == null ? self : self.RemoveAtRange(indicesToRemove); | ||||||
| } | ||||||
|
|
||||||
| /// <summary> | ||||||
|
|
@@ -1398,5 +1384,131 @@ private ImmutableArray<T> InsertSpanRangeInternal(int index, ReadOnlySpan<T> ite | |||||
|
|
||||||
| return new ImmutableArray<T>(tmp); | ||||||
| } | ||||||
|
|
||||||
| private ICollection<int>? FindOrderedIndicesToRemoveByMultiSet(IEnumerable<T> items, IEqualityComparer<T>? equalityComparer) | ||||||
| { | ||||||
| var multiSet = new MultiSet(equalityComparer); | ||||||
| foreach (T item in items) | ||||||
| { | ||||||
| multiSet.Add(item); | ||||||
| } | ||||||
|
|
||||||
| return GetOrderedIndicesToRemoveFor(multiSet); | ||||||
| } | ||||||
|
|
||||||
| private ICollection<int>? FindOrderedIndicesToRemoveByMultiSet(ReadOnlySpan<T> items, IEqualityComparer<T>? equalityComparer) | ||||||
| { | ||||||
| var multiSet = new MultiSet(equalityComparer); | ||||||
| foreach (ref readonly T item in items) | ||||||
| { | ||||||
| multiSet.Add(item); | ||||||
| } | ||||||
|
|
||||||
| return GetOrderedIndicesToRemoveFor(multiSet); | ||||||
| } | ||||||
|
|
||||||
| private ICollection<int>? GetOrderedIndicesToRemoveFor(MultiSet multiSet) | ||||||
| { | ||||||
| List<int>? indicesToRemove = null; | ||||||
| for (int i = 0; i < array!.Length; i++) | ||||||
| { | ||||||
| if (multiSet.TryRemove(array[i])) | ||||||
| { | ||||||
| (indicesToRemove ??= new()).Add(i); | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| return indicesToRemove; | ||||||
| } | ||||||
|
|
||||||
| private SortedSet<int> FindOrderedIndicesToRemove(IEnumerable<T> items, IEqualityComparer<T>? equalityComparer) | ||||||
| { | ||||||
| var indicesToRemove = new SortedSet<int>(); | ||||||
| foreach (T item in items) | ||||||
| { | ||||||
| int index = -1; | ||||||
| do | ||||||
| { | ||||||
| index = IndexOf(item, index + 1, equalityComparer); | ||||||
| } while (index >= 0 && !indicesToRemove.Add(index) && index < Length - 1); | ||||||
| } | ||||||
|
|
||||||
| return indicesToRemove; | ||||||
| } | ||||||
|
|
||||||
| private readonly struct MultiSet | ||||||
| { | ||||||
| private readonly Dictionary<NullableKeyWrapper, int> _dictionary; | ||||||
|
|
||||||
| public MultiSet(IEqualityComparer<T>? equalityComparer) | ||||||
| { | ||||||
| _dictionary = new Dictionary<NullableKeyWrapper, int>(new NullableKeyWrapperEqualityComparer(equalityComparer)); | ||||||
| } | ||||||
|
|
||||||
| public void Add(T? item) | ||||||
| { | ||||||
| #if NET6_0_OR_GREATER | ||||||
| ref int count = ref CollectionsMarshal.GetValueRefOrAddDefault(_dictionary, item, out _); | ||||||
| count++; | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might need to guard against overflow?
Suggested change
|
||||||
| #else | ||||||
| _dictionary[item] = _dictionary.TryGetValue(item, out int count) ? count + 1 : 1; | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| #endif | ||||||
| } | ||||||
|
|
||||||
| public bool TryRemove(T? item) | ||||||
| { | ||||||
| #if NET6_0_OR_GREATER | ||||||
| ref int count = ref CollectionsMarshal.GetValueRefOrNullRef(_dictionary, item); | ||||||
| if (Unsafe.IsNullRef(ref count) || count == 0) | ||||||
| #else | ||||||
| if (!_dictionary.TryGetValue(item, out int count) || count == 0) | ||||||
| #endif | ||||||
| { | ||||||
| return false; | ||||||
| } | ||||||
|
|
||||||
| #if NET6_0_OR_GREATER | ||||||
| count--; | ||||||
| #else | ||||||
| _dictionary[item] = count - 1; | ||||||
| #endif | ||||||
| return true; | ||||||
| } | ||||||
|
|
||||||
| private readonly struct NullableKeyWrapper | ||||||
| { | ||||||
| public readonly T? Key; | ||||||
|
|
||||||
| public static implicit operator NullableKeyWrapper(T? key) | ||||||
| { | ||||||
| return new NullableKeyWrapper(key); | ||||||
| } | ||||||
|
|
||||||
| private NullableKeyWrapper(T? key) | ||||||
| { | ||||||
| Key = key; | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| private class NullableKeyWrapperEqualityComparer : IEqualityComparer<NullableKeyWrapper> | ||||||
eiriktsarpalis marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| { | ||||||
| private readonly IEqualityComparer<T>? _keyComparer; | ||||||
|
|
||||||
| public NullableKeyWrapperEqualityComparer(IEqualityComparer<T>? keyComparer) | ||||||
| { | ||||||
| _keyComparer = keyComparer; | ||||||
| } | ||||||
|
|
||||||
| public int GetHashCode(NullableKeyWrapper obj) | ||||||
| { | ||||||
| return _keyComparer is null ? EqualityComparer<T>.Default.GetHashCode(obj.Key!) : _keyComparer.GetHashCode(obj.Key!); | ||||||
| } | ||||||
|
|
||||||
| public bool Equals(NullableKeyWrapper x, NullableKeyWrapper y) | ||||||
| { | ||||||
| return _keyComparer is null ? EqualityComparer<T>.Default.Equals(x.Key, y.Key) : _keyComparer.Equals(x.Key, y.Key); | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
| } | ||||||




There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.