Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Tweak scalar paths
  • Loading branch information
MihaZupan committed Nov 20, 2022
commit 5b648d1185a00b52a9156bb4c904a0ecf375e30b
Original file line number Diff line number Diff line change
Expand Up @@ -431,8 +431,8 @@
<Compile Include="$(MSBuildThisFileDirectory)System\IndexOfAnyValues\IndexOfAnyValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\IndexOfAnyValues\IndexOfAnyValues.T.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\IndexOfAnyValues\IndexOfAnyValuesDebugView.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\IndexOfAnyValues\IndexOfEmptyValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\IndexOfAnyValues\IndexOfAnyValuesInRange.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\IndexOfAnyValues\IndexOfEmptyValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\IndexOfAnyValues\ProbabilisticMap.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\IndexOutOfRangeException.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\InsufficientExecutionStackException.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,20 @@ public void Set(int c)

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public readonly bool Contains128(char c) =>
c < 128 && Contains((byte)c);
c < 128 && ContainsUnchecked(c);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public readonly bool Contains256(char c) =>
c < 256 && Contains((byte)c);
c < 256 && ContainsUnchecked(c);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public readonly bool Contains(byte b)
public readonly bool Contains(byte b) =>
ContainsUnchecked(b);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private readonly bool ContainsUnchecked(int b)
{
Debug.Assert(b < 256);
uint offset = (uint)(b >> 5);
uint significantBit = 1u << (b & 31);
return (_values[offset] & significantBit) != 0;
Expand All @@ -40,7 +45,7 @@ public readonly char[] GetCharValues()
var chars = new List<char>();
for (int i = 0; i < 256; i++)
{
if (Contains((byte)i))
if (ContainsUnchecked(i))
{
chars.Add((char)i);
}
Expand All @@ -53,7 +58,7 @@ public readonly byte[] GetByteValues()
var bytes = new List<byte>();
for (int i = 0; i < 256; i++)
{
if (Contains((byte)i))
if (ContainsUnchecked(i))
{
bytes.Add((byte)i);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,18 @@ private int LastIndexOfAny<TNegator>(ref byte searchSpace, int searchSpaceLength
private int IndexOfAnyScalar<TNegator>(ref byte searchSpace, int searchSpaceLength)
where TNegator : struct, IndexOfAnyAsciiSearcher.INegator
{
for (int i = 0; i < searchSpaceLength; i++)
ref byte searchSpaceEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength);
ref byte cur = ref searchSpace;

while (!Unsafe.AreSame(ref cur, ref searchSpaceEnd))
{
if (TNegator.NegateIfNeeded(_lookup.Contains(Unsafe.Add(ref searchSpace, i))))
byte b = cur;
if (TNegator.NegateIfNeeded(_lookup.Contains(b)))
{
return i;
return (int)Unsafe.ByteOffset(ref searchSpace, ref cur);
}

cur = ref Unsafe.Add(ref cur, 1);
}

return -1;
Expand All @@ -74,7 +80,8 @@ private int LastIndexOfAnyScalar<TNegator>(ref byte searchSpace, int searchSpace
{
for (int i = searchSpaceLength - 1; i >= 0; i--)
{
if (TNegator.NegateIfNeeded(_lookup.Contains(Unsafe.Add(ref searchSpace, i))))
byte b = Unsafe.Add(ref searchSpace, i);
if (TNegator.NegateIfNeeded(_lookup.Contains(b)))
{
return i;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,18 @@ private int LastIndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength
private int IndexOfAnyScalar<TNegator>(ref char searchSpace, int searchSpaceLength)
where TNegator : struct, IndexOfAnyAsciiSearcher.INegator
{
for (int i = 0; i < searchSpaceLength; i++)
ref char searchSpaceEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength);
ref char cur = ref searchSpace;

while (!Unsafe.AreSame(ref cur, ref searchSpaceEnd))
{
if (TNegator.NegateIfNeeded(_lookup.Contains128(Unsafe.Add(ref searchSpace, i))))
char c = cur;
if (TNegator.NegateIfNeeded(_lookup.Contains128(c)))
{
return i;
return (int)(Unsafe.ByteOffset(ref searchSpace, ref cur) / sizeof(char));
}

cur = ref Unsafe.Add(ref cur, 1);
}

return -1;
Expand All @@ -74,7 +80,8 @@ private int LastIndexOfAnyScalar<TNegator>(ref char searchSpace, int searchSpace
{
for (int i = searchSpaceLength - 1; i >= 0; i--)
{
if (TNegator.NegateIfNeeded(_lookup.Contains128(Unsafe.Add(ref searchSpace, i))))
char c = Unsafe.Add(ref searchSpace, i);
if (TNegator.NegateIfNeeded(_lookup.Contains128(c)))
{
return i;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,18 @@ private int LastIndexOfAny<TNegator>(ref byte searchSpace, int searchSpaceLength
private int IndexOfAnyScalar<TNegator>(ref byte searchSpace, int searchSpaceLength)
where TNegator : struct, IndexOfAnyAsciiSearcher.INegator
{
for (int i = 0; i < searchSpaceLength; i++)
ref byte searchSpaceEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength);
ref byte cur = ref searchSpace;

while (!Unsafe.AreSame(ref cur, ref searchSpaceEnd))
{
if (TNegator.NegateIfNeeded(_lookup.Contains(Unsafe.Add(ref searchSpace, i))))
byte b = cur;
if (TNegator.NegateIfNeeded(_lookup.Contains(b)))
{
return i;
return (int)(Unsafe.ByteOffset(ref searchSpace, ref cur));
}

cur = ref Unsafe.Add(ref cur, 1);
}

return -1;
Expand All @@ -71,7 +77,8 @@ private int LastIndexOfAnyScalar<TNegator>(ref byte searchSpace, int searchSpace
{
for (int i = searchSpaceLength - 1; i >= 0; i--)
{
if (TNegator.NegateIfNeeded(_lookup.Contains(Unsafe.Add(ref searchSpace, i))))
byte b = Unsafe.Add(ref searchSpace, i);
if (TNegator.NegateIfNeeded(_lookup.Contains(b)))
{
return i;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,15 @@

namespace System.Buffers
{
internal sealed class IndexOfAnyCharValuesProbabilistic<TContains> : IndexOfAnyValues<char>
where TContains : struct, IndexOfAnyValues.IStringContains
internal sealed class IndexOfAnyCharValuesProbabilistic : IndexOfAnyValues<char>
{
private readonly ProbabilisticMap _map;
private ProbabilisticMap _map;
private readonly string _values;

public unsafe IndexOfAnyCharValuesProbabilistic(ReadOnlySpan<char> values)
{
_values = new string(values);

ProbabilisticMap map = default;
ProbabilisticMap.Initialize((uint*)&map, _values);
_map = map;
_map = new ProbabilisticMap(_values);
}

internal override char[] GetValues() => _values.ToCharArray();
Expand All @@ -39,44 +35,14 @@ internal override int LastIndexOfAny(ReadOnlySpan<char> span) =>
internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span) =>
LastIndexOfAny<IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length);

[MethodImpl(MethodImplOptions.NoInlining)]
private int IndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength)
where TNegator : struct, IndexOfAnyAsciiSearcher.INegator
{
string values = _values;

for (int i = 0; i < searchSpaceLength; i++)
{
int ch = Unsafe.Add(ref searchSpace, i);
if (TNegator.NegateIfNeeded(
_map.IsCharBitSet((byte)ch) &&
_map.IsCharBitSet((byte)(ch >> 8)) &&
TContains.Contains(values, (char)ch)))
{
return i;
}
}

return -1;
}
where TNegator : struct, IndexOfAnyAsciiSearcher.INegator =>
ProbabilisticMap.IndexOfAny<TNegator>(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), ref searchSpace, searchSpaceLength, _values);

[MethodImpl(MethodImplOptions.NoInlining)]
private int LastIndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength)
where TNegator : struct, IndexOfAnyAsciiSearcher.INegator
{
string values = _values;

for (int i = searchSpaceLength - 1; i >= 0; i--)
{
int ch = Unsafe.Add(ref searchSpace, i);
if (TNegator.NegateIfNeeded(
_map.IsCharBitSet((byte)ch) &&
_map.IsCharBitSet((byte)(ch >> 8)) &&
TContains.Contains(values, (char)ch)))
{
return i;
}
}

return -1;
}
where TNegator : struct, IndexOfAnyAsciiSearcher.INegator =>
ProbabilisticMap.LastIndexOfAny<TNegator>(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), ref searchSpace, searchSpaceLength, _values);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,18 @@ internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span) =>
private int IndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength)
where TNegator : struct, IndexOfAnyAsciiSearcher.INegator
{
for (int i = 0; i < searchSpaceLength; i++)
ref char searchSpaceEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength);
ref char cur = ref searchSpace;

while (!Unsafe.AreSame(ref cur, ref searchSpaceEnd))
{
if (TNegator.NegateIfNeeded(_lookup.Contains256(Unsafe.Add(ref searchSpace, i))))
char c = cur;
if (TNegator.NegateIfNeeded(_lookup.Contains256(c)))
{
return i;
return (int)(Unsafe.ByteOffset(ref searchSpace, ref cur) / sizeof(char));
}

cur = ref Unsafe.Add(ref cur, 1);
}

return -1;
Expand All @@ -61,7 +67,8 @@ private int LastIndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength
{
for (int i = searchSpaceLength - 1; i >= 0; i--)
{
if (TNegator.NegateIfNeeded(_lookup.Contains256(Unsafe.Add(ref searchSpace, i))))
char c = Unsafe.Add(ref searchSpace, i);
if (TNegator.NegateIfNeeded(_lookup.Contains256(c)))
{
return i;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,7 @@ ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(values)),
return new IndexOfAnyLatin1CharValues(values);
}

return values.Length < Vector128<short>.Count
? new IndexOfAnyCharValuesProbabilistic<ShortLoopContains>(values)
: new IndexOfAnyCharValuesProbabilistic<StringContains>(values);
return new IndexOfAnyCharValuesProbabilistic(values);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still a little concerned about the size impact this method is going to have. Any use of IndexOfAnyValues.Create is going to root all possible implementations (IndexOfEmptyValues, IndexOfAny1Value, IndexOfAny2Values, IndexOfAny3Values, IndexOfAny4Values, IndexOfAny5Values, IndexOfAnyLatin1CharValues, IndexOfAnyCharValuesProbabilistic, and IndexOfAnyValuesInRange), regardless of which is actually used. Basically we're adding a choke point.

I don't have a better answer, unless we want to limit what this API produces, such that we don't special-case APIs that already have a direct public entrypoint (IndexOf0/1/2/3, IndexOfRange).

Let's go with it for now, but keep an eye on it. It'd also be helpful in this regard for us to either in this PR or immediately after follow it up with using these APIs everywhere they're applicable and see what kind of impact it actually has on our size benchmarks, and then what we can do about it. For example, maybe there are ways to share most of the code associated with the vectorization of the individual algorithms, such that each doesn't bring in nearly as much as it does today (Adam and I had spoken about an approach where we'd parameterize the algorithms with a generic struct that provided the setup and comparisons as methods that the driver could then call appropriately as part of loops, unrolled call sites, etc.) For use within corelib, we might also want to make some of these helpers internal and allow those uses to bypass the public Create in order to directly instantiate the needed type. If things still end up being bad, we could consider replacing the general Create with more specialized ones focused on known characteristics of the data. Worst case, we could also employ some additional linker switches to remove various code paths if we want to trade off optimal speed for size.

All that said, I still really like the simplicity of the design we currently have, and that it affords us the ability to pick the best implementation given the supplied data.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still a little concerned about the size impact this method is going to have

This PR adds 50kB to System.Private.CoreLib.dll with R2R. It is not end of the world given how much we add to the product in each release.

we'd parameterize the algorithms with a generic struct

Note that this only helps with IL size. It does not help with native code size. Also, the generic structs cost some startup time and memory at runtime.

We may want to look into whether the factory can be interpreted at compile time by the native aot compiler. cc @MichalStrehovsky

Copy link
Member Author

@MihaZupan MihaZupan Nov 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's go with it for now, but keep an eye on it. It'd also be helpful in this regard for us to either in this PR or immediately after follow it up with using these APIs everywhere they're applicable and see what kind of impact it actually has on our size benchmarks

👍 already working on that.
FWIW, in quite a few places so far I've been able to delete substantial chunks of code with this API, so it'd be pretty cool if this was actually a net size improvement in the long run.

Over a bunch of places across runtime where I've used this API locally, I've never used it for the "(IndexOf0/1/2/3, IndexOfRange)" set you called out. Likewise, for cases with 4/5 values, I've never used it if I knew that it would just defer to the existing 4/5 value implementation that could be reached via IndexOfAny(const).
Ignoring cases where the underlying hardware doesn't support these algorithms, we're really just using Ascii in the vast majority of cases from within runtime. Regex can also make similar decisions and avoid using this API for cases that wouldn't benefit.

That said, it's still nice that this API gives you an optimal implementation even if you aren't as concerned about startup/size costs and just want to use the same API for everything without requiring you to be aware of internal implementation details.

Adam and I had spoken about an approach where we'd parameterize the algorithms with a generic struct that provided the setup and comparisons as methods that the driver could then call appropriately as part of loops, unrolled call sites, etc.

I'm curious about what this would look like.

Copy link
Member

@stephentoub stephentoub Nov 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR adds 50kB to System.Private.CoreLib.dll with R2R. It is not end of the world given how much we add to the product in each release.

Yeah, my concern isn't the all-up size of corelib, rather the impact on a small trimmed app. But your lack of concern lowers my concern :)

We may want to look into whether the factory can be interpreted at compile time by the native aot compiler

👍

Copy link
Member

@stephentoub stephentoub Nov 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so it'd be pretty cool if this was actually a net size improvement.

It would be.

I've never used it for the "(IndexOf0/1/2/3, IndexOfRange)" set you called out

Right. Which begs the question of whether it's actually worth including those, since whether we use them or not, at present all that code is going to be kept with the current trimming.

Regex can also make similar decisions and avoid using this API for cases that wouldn't benefit.

Yes, my intention is that the source generator and compiler only use this for cases where there isn't currently a better direct API for it. In large part that's to help with readability, but it'll also help reduce startup overheads.

I'm curious about what this would look like.

The rough strawman, which we never actually tried, was something along the lines of (very rough pseudo code)

internal interface IIndexOfComparer<T>
{
    bool IsMatch(T item);
    bool IsMatch(Vector128<T> items);
    bool IsMatch(Vector256<T> items);
}

then with a shared driver like:

internal int IndexOfCore<T, TCore>(ReadOnlySpan<T> span, TComparer comparer) where TComparer : IIndexOfComparer<T>
{
    if (!Vector128.IsHardwareAccelerated || span.Length < Vector128<T>.Count)
    {
        // TODO: Unroll for short spans
        for (int i = 0; i < span.Length; i++)
            if (comparer.IsMatch(span[i]))
                return i;
    }
    else if (!Vector256.IsHardwareAccelerated || span.Length < Vector256<T>.Count)
    {
        ...
        if (comparer.IsMatch(currentVector))
            return FindIndex(currentVector);
        ...
    }
    else
    {
        ... // same for Vector256
    }
    return -1;
}

and then use like:

public static bool IndexOfAny(ReadOnlySpan<char> span, char value0, char value1) =>
    IndexOfCore(span, new IndexOfAny2(value0, value1));

private readonly struct IndexOfAny2<T> : IIndexOfComparer<T>
{
    private T _value1, value2;
    private readonly Vector128<T> _vector128_1, _vector128_2;
    private readonly Vector256<T> _vector256_1, _vector256_2;

    public IndexOfAny2(T value1, T value2)
    {
        _value1 = value1;
        _value2 = value2;
        _vector128_1 = Vector128.Create(value1);
        _vector128_2 = Vector128.Create(value2);
        ...
    }

    public bool IsMatch(T value) => value == _value1 || value == _value2;
    public bool IsMatch(Vector128 values) => values == _vector128_1 || values == _vector128_2;
    ...
}

etc. It'd end up looking similar in structure to what you currently have in this PR, actually, just at a lower level.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may want to look into whether the factory can be interpreted at compile time by the native aot compiler. cc @MichalStrehovsky

You mean to interpret the Create method this comment is on ahead of time depending on the callsite?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IndexOfAnyValues.Create is expected to be often used in static constructors with constant input. For example, like this: https://github.com/dotnet/runtime/pull/78666/files#diff-2599fdb4dd17bc235b019eb03aed3a26260765050d1e48419bc3e44319ecb147R31-R32


private static IndexOfAnyValues<T>? TryGetSingleRange<T>(ReadOnlySpan<T> values, out T maxInclusive)
Expand Down Expand Up @@ -169,32 +167,5 @@ ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(values)),

return (IndexOfAnyValues<T>)(object)new IndexOfAnyValuesInRange<T>(min, max);
}

internal interface IStringContains
{
public static abstract bool Contains(string s, char value);
}

private readonly struct StringContains : IStringContains
{
public static bool Contains(string s, char value) => s.Contains(value);
}

private readonly struct ShortLoopContains : IStringContains
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool Contains(string s, char value)
{
foreach (char c in s)
{
if (value == c)
{
return true;
}
}

return false;
}
}
}
}
Loading