Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Versioning;

namespace System.Collections.Immutable
Expand Down Expand Up @@ -575,17 +577,9 @@ public ImmutableArray<T> RemoveRange(IEnumerable<T> items, IEqualityComparer<T>?
self.ThrowNullRefIfNotInitialized();
Requires.NotNull(items, nameof(items));

var indicesToRemove = new SortedSet<int>();
foreach (T item in items)
{
int index = -1;
do
{
index = self.IndexOf(item, index + 1, equalityComparer);
} while (index >= 0 && !indicesToRemove.Add(index) && index < self.Length - 1);
}

return self.RemoveAtRange(indicesToRemove);
// Avoid to allocate for true enumerable
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Avoid to allocate for true enumerable
// Avoid building a multiset for enumerables of unknown size.

ICollection<int>? indicesToRemove = items.TryGetCount(out _) ? self.FindOrderedIndicesToRemoveByMultiSet(items, equalityComparer) : self.FindOrderedIndicesToRemove(items, equalityComparer);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I understand why we need to fall back to the old algorithm if we can't determine the size?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See previous comment(61196)
Here we use TryGetCount to test type.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a comment explaining this? What about collections with very large counts?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a comment explaining this?

Added.

What about collections with very large counts?

Not sure, I just feel the fall back code is for true enumerable data which sounds like long streaming concept. But if it is already a large reality collection data, that will mean that it was successful to allocate large memory (so same order of magnitude). Please give suggestion.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One possible approach might be to fall back to the slow implementation once the multiset exceeds a certain capacity dynamically:

        private ICollection<int>? FindOrderedIndicesToRemoveByMultiSet(IEnumerable<T> items, IEqualityComparer<T>? equalityComparer)
        {
            var multiSet = new MultiSet(equalityComparer);
            int i = 0;
            foreach (T item in items)
            {
                multiSet.Add(item);
                if (++i == 100) // threshold way above what I would expect is the typical input enumerable.
                {
                      return FindOrderedIndicesToRemove(items, equalityComparer);
                }
            }

            return GetOrderedIndicesToRemoveFor(multiSet);
        }

I would strongly encourage you to make any decision based on benchmark numbers, however.

Copy link
Contributor Author

@lateapexearlyspeed lateapexearlyspeed Mar 1, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

          // Not check size
           public ICollection<int>? Fast_NoLimit_FindOrderedIndicesToRemoveByMultiSet(IEnumerable<T> items, IEqualityComparer<T>? equalityComparer)
            {
                var multiSet = new MultiSet(equalityComparer);
                foreach (T item in items)
                {
                    multiSet.Add(item);
                }

                return GetOrderedIndicesToRemoveFor(multiSet);
            }

          // Firstly try to get collection count property directly then fall back to iterate to trigger size limit dynamically
           public ICollection<int>? Fast_TryGetCountThenIterateCheckCount(IEnumerable<T> items, IEqualityComparer<T>? equalityComparer)
            {
                if (items.TryGetCount(out int count))
                {
                    if (count <= ThrottleValue)
                    {
                        return Fast_NoLimit_FindOrderedIndicesToRemoveByMultiSet(items, equalityComparer);
                    }

                    return SlowFindOrderedIndicesToRemove(items, equalityComparer);
                }

                return Fast_IterateCheckCount(items, equalityComparer);
            }

          // Firstly try to get collection count property directly then fall back to iterate to trigger size limit compared with actual dictionary.count (dedup same value purpose) dynamically
            public ICollection<int>? Fast_TryGetCountThenIterateCheckCountByDicLength(IEnumerable<T> items, IEqualityComparer<T>? equalityComparer)
            {
                if (items.TryGetCount(out int count))
                {
                    if (count <= ThrottleValue)
                    {
                        return Fast_NoLimit_FindOrderedIndicesToRemoveByMultiSet(items, equalityComparer);
                    }

                    return SlowFindOrderedIndicesToRemove(items, equalityComparer);
                }

                return Fast_IterateCheckCountByDicLength(items, equalityComparer);
            }

          // Iterate to trigger size limit compared with actual dictionary.count (dedup same value purpose) dynamically
            public ICollection<int>? Fast_IterateCheckCountByDicLength(IEnumerable<T> items, IEqualityComparer<T>? equalityComparer)
            {
                var multiSet = new MultiSet(equalityComparer);
                foreach (T item in items)
                {
                    if (multiSet.ItemCount > ThrottleValue)
                    {
                        return SlowFindOrderedIndicesToRemove(items, equalityComparer);
                    }
                    multiSet.Add(item);
                }

                return GetOrderedIndicesToRemoveFor(multiSet);
            }

          // Iterate to trigger size limit dynamically
            public ICollection<int>? Fast_IterateCheckCount(IEnumerable<T> items, IEqualityComparer<T>? equalityComparer)
            {
                var multiSet = new MultiSet(equalityComparer);
                int i = 0;
                foreach (T item in items)
                {
                    if (++i > ThrottleValue)
                    {
                        return SlowFindOrderedIndicesToRemove(items, equalityComparer);
                    }
                    multiSet.Add(item);
                }

                return GetOrderedIndicesToRemoveFor(multiSet);
            }

           private readonly struct MultiSet
           {
                public int ItemCount => _dictionary.Count;
           }

Benchmark condition:
ImmutableArray length = 100, Items length = 100

  1. Items is true enumerable
    image
  2. Items is array instance
    image

Result:
Pure fast method is ~1x faster than slow method
If not hit throttleValue, pure fast is almost same time as all other fast methods with throttling check
If hit throttleValue so that fall back to slow path, then all fast methods with throttling check are much slower than original slow method, except "Fast_TryGetCountThenIterateCheckCount()" and "Fast_TryGetCountThenIterateCheckCountByDicLength()" when items is Array instance because they can early find situation to fall back to slow path. However they are still slower than original slow method.

So if we need to have size limit, maybe Fast_TryGetCountThenIterateCheckCountByDicLength() is better, but not sure what throttling value is better, LOH triggering size is 85k but need to find out it after Dictionary already scales array size (EnsureCapacity). Need guidance for that, thanks.
Otherwise, pure fast method can give shortest time in all cases.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I think Fast_TryGetCountThenIterateCheckCountByDicLength is ok, although I would be interested in seeing the numbers for various sizes of input enumerables, particularly for low values like 1, 2 or 10 elements. It might be the case that for low enough values creating a MultiSet actually makes things slower. You can execute benchmarks for multiple sizes using BDN's ParamsAttribute.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Under condition of 100 length ImmutableArray, fast method is slower until items length up to ~30.
image
image

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm, these numbers make me think that this optimization is likely not worth our while. I don't think it's worth optimizing for inputs > 30, it's unlikely that this method would be used with so large inputs. I'd probably just close this PR. Thanks for driving the performance investigation!

return indicesToRemove == null ? self : self.RemoveAtRange(indicesToRemove);
}

/// <summary>
Expand Down Expand Up @@ -881,17 +875,9 @@ public ImmutableArray<T> RemoveRange(ReadOnlySpan<T> items, IEqualityComparer<T>
return self.Remove(items[0], equalityComparer);
}

var indicesToRemove = new SortedSet<int>();
foreach (T item in items)
{
int index = -1;
do
{
index = self.IndexOf(item, index + 1, equalityComparer);
} while (index >= 0 && !indicesToRemove.Add(index) && index < self.Length - 1);
}
ICollection<int>? indicesToRemove = self.FindOrderedIndicesToRemoveByMultiSet(items, equalityComparer);

return self.RemoveAtRange(indicesToRemove);
return indicesToRemove == null ? self : self.RemoveAtRange(indicesToRemove);
}

/// <summary>
Expand Down Expand Up @@ -1398,5 +1384,131 @@ private ImmutableArray<T> InsertSpanRangeInternal(int index, ReadOnlySpan<T> ite

return new ImmutableArray<T>(tmp);
}

private ICollection<int>? FindOrderedIndicesToRemoveByMultiSet(IEnumerable<T> items, IEqualityComparer<T>? equalityComparer)
{
var multiSet = new MultiSet(equalityComparer);
foreach (T item in items)
{
multiSet.Add(item);
}

return GetOrderedIndicesToRemoveFor(multiSet);
}

private ICollection<int>? FindOrderedIndicesToRemoveByMultiSet(ReadOnlySpan<T> items, IEqualityComparer<T>? equalityComparer)
{
var multiSet = new MultiSet(equalityComparer);
foreach (ref readonly T item in items)
{
multiSet.Add(item);
}

return GetOrderedIndicesToRemoveFor(multiSet);
}

private ICollection<int>? GetOrderedIndicesToRemoveFor(MultiSet multiSet)
{
List<int>? indicesToRemove = null;
for (int i = 0; i < array!.Length; i++)
{
if (multiSet.TryRemove(array[i]))
{
(indicesToRemove ??= new()).Add(i);
}
}

return indicesToRemove;
}

private SortedSet<int> FindOrderedIndicesToRemove(IEnumerable<T> items, IEqualityComparer<T>? equalityComparer)
{
var indicesToRemove = new SortedSet<int>();
foreach (T item in items)
{
int index = -1;
do
{
index = IndexOf(item, index + 1, equalityComparer);
} while (index >= 0 && !indicesToRemove.Add(index) && index < Length - 1);
}

return indicesToRemove;
}

private readonly struct MultiSet
{
private readonly Dictionary<NullableKeyWrapper, int> _dictionary;

public MultiSet(IEqualityComparer<T>? equalityComparer)
{
_dictionary = new Dictionary<NullableKeyWrapper, int>(new NullableKeyWrapperEqualityComparer(equalityComparer));
}

public void Add(T? item)
{
#if NET6_0_OR_GREATER
ref int count = ref CollectionsMarshal.GetValueRefOrAddDefault(_dictionary, item, out _);
count++;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might need to guard against overflow?

Suggested change
count++;
checked { count++; }

#else
_dictionary[item] = _dictionary.TryGetValue(item, out int count) ? count + 1 : 1;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
_dictionary[item] = _dictionary.TryGetValue(item, out int count) ? count + 1 : 1;
_dictionary[item] = _dictionary.TryGetValue(item, out int count) ? checked(count + 1) : 1;

#endif
}

public bool TryRemove(T? item)
{
#if NET6_0_OR_GREATER
ref int count = ref CollectionsMarshal.GetValueRefOrNullRef(_dictionary, item);
if (Unsafe.IsNullRef(ref count) || count == 0)
#else
if (!_dictionary.TryGetValue(item, out int count) || count == 0)
#endif
{
return false;
}

#if NET6_0_OR_GREATER
count--;
#else
_dictionary[item] = count - 1;
#endif
return true;
}

private readonly struct NullableKeyWrapper
{
public readonly T? Key;

public static implicit operator NullableKeyWrapper(T? key)
{
return new NullableKeyWrapper(key);
}

private NullableKeyWrapper(T? key)
{
Key = key;
}
}

private class NullableKeyWrapperEqualityComparer : IEqualityComparer<NullableKeyWrapper>
{
private readonly IEqualityComparer<T>? _keyComparer;

public NullableKeyWrapperEqualityComparer(IEqualityComparer<T>? keyComparer)
{
_keyComparer = keyComparer;
}

public int GetHashCode(NullableKeyWrapper obj)
{
return _keyComparer is null ? EqualityComparer<T>.Default.GetHashCode(obj.Key!) : _keyComparer.GetHashCode(obj.Key!);
}

public bool Equals(NullableKeyWrapper x, NullableKeyWrapper y)
{
return _keyComparer is null ? EqualityComparer<T>.Default.Equals(x.Key, y.Key) : _keyComparer.Equals(x.Key, y.Key);
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1509,8 +1509,10 @@ public static IEnumerable<object[]> RemoveRangeEnumerableData()
new[]
{
new object[] { Array.Empty<int?>(), Array.Empty<int?>(), comparer },
new object[] { Array.Empty<int?>(), Enumerable.Empty<int?>(), comparer },
new object[] { Array.Empty<int?>(), new int?[] { 1 }, comparer },
new object[] { new int?[] { 1 }, Array.Empty<int?>(), comparer },
new object[] { new int?[] { 1 }, Enumerable.Empty<int?>(), comparer },
new object[] { new int?[] { 1, 2, 3 }, new int?[] { 2, 3, 4 }, comparer },
new object[] { Enumerable.Range(1, 5).Cast<int?>(), Enumerable.Range(6, 5).Cast<int?>(), comparer },
new object[] { new int?[] { 1, 2, 3 }, new int?[] { 2 }, comparer },
Expand All @@ -1521,14 +1523,19 @@ public static IEnumerable<object[]> RemoveRangeEnumerableData()
new object[] { new int?[] { 1, 2, 2, 3 }, new int?[] { 2 }, comparer },
new object[] { new int?[] { 1, 2, 2, 3 }, new int?[] { 2, 2 }, comparer },
new object[] { new int?[] { 1, 2, 2, 3 }, new int?[] { 2, 2, 2 }, comparer },
new object[] { new int?[] { 1, 2, 2, 3 }, Enumerable.Repeat<int?>(2, 3), comparer },
new object[] { new int?[] { 1, 2, 3 }, new int?[] { 42 }, comparer },
new object[] { new int?[] { 1, 2, 3 }, new int?[] { 42, 42 }, comparer },
new object[] { new int?[] { 1, 2, 3 }, new int?[] { 42, 42, 42 }, comparer },
new object[] { new int?[] { null }, new int?[] { 1 }, comparer },
new object[] { new int?[] { null }, Enumerable.Repeat<int?>(1, 1), comparer },
new object[] { new int?[] { 1 }, new int?[] { null}, comparer },
new object[] { new int?[] { 1 }, Enumerable.Repeat<int?>(null, 1), comparer },
new object[] { new int?[] { 1, null, 2, null }, new int?[] { 1, null}, comparer },
new object[] { new int?[] { 1, null, 2 }, new int?[] { 1, null, null}, comparer },
new object[] { new int?[] { 1, null, 2 }, Enumerable.Repeat<int?>(1, 1).Concat(Enumerable.Repeat<int?>(null, 2)), comparer },
new object[] { new int?[] { 1, null, 2, null }, new int?[] { 1, null, null}, comparer },
new object[] { new int?[] { 1, null, 2, null }, Enumerable.Repeat<int?>(1, 1).Concat(Enumerable.Repeat<int?>(null, 2)), comparer }
});
}

Expand Down Expand Up @@ -2623,8 +2630,8 @@ private static IEnumerable<IEqualityComparer<T>> SharedEqualityComparers<T>()
// Null comparers should be accepted and translated to the default comparer.
yield return null;
yield return EqualityComparer<T>.Default;
yield return new DelegateEqualityComparer<T>(equals: (x, y) => true, objectGetHashCode: obj => 0);
yield return new DelegateEqualityComparer<T>(equals: (x, y) => false, objectGetHashCode: obj => 0);
yield return new DelegateEqualityComparer<T>(equals: (x, y) => true, objectGetHashCode: obj => 0, getHashCode: obj => 0);
yield return new DelegateEqualityComparer<T>(equals: (x, y) => false, objectGetHashCode: obj => 0, getHashCode: obj => 0);
}

/// <summary>
Expand Down