diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/CharKind.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/CharKind.cs
index 39f95920da31b3..1952a7dcd1e006 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/CharKind.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/CharKind.cs
@@ -29,9 +29,12 @@ internal static class CharKind
/// Gets the next character kind from a context
internal static uint Next(uint context) => context >> 3;
- /// Creates the context of the previous and the next character kinds.
+ /// Encodes the pair (prevKind, nextKind) using 6 bits
internal static uint Context(uint prevKind, uint nextKind) => (nextKind << 3) | prevKind;
+ /// Exclusive maximum context (limit) is 64 because a context uses bit-shifting where each kind needs 3 bits.
+ internal const int ContextLimit = 64;
+
internal static string DescribePrev(uint i) => i switch
{
StartStop => @"\A",
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
index ac56fd972d0cf1..a8cec12036225a 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
@@ -359,9 +359,7 @@ public DfaMatchingState TakeTransition(
Debug.Assert(builder._delta is not null);
int offset = (currentState.Id << builder._mintermsCount) | mintermId;
- return
- builder._delta[offset] ??
- matcher.CreateNewTransition(currentState, minterm, offset);
+ return Volatile.Read(ref builder._delta[offset]) ?? matcher.CreateNewTransition(currentState, minterm, offset);
}
}
@@ -391,7 +389,7 @@ public DfaMatchingState TakeTransition(
DfaMatchingState nextStates = builder.MkState(oneState, currentStates.PrevCharKind);
int offset = (nextStates.Id << builder._mintermsCount) | mintermId;
- DfaMatchingState p = builder._delta[offset] ?? matcher.CreateNewTransition(nextStates, minterm, offset);
+ DfaMatchingState p = Volatile.Read(ref builder._delta[offset]) ?? matcher.CreateNewTransition(nextStates, minterm, offset);
// Observe that if p.Node is an Or it will be flattened.
union = builder.MkOr2(union, p.Node);
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexNode.cs
index 41dfdf3898bf58..bf46baf57c147e 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexNode.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexNode.cs
@@ -13,6 +13,12 @@ namespace System.Text.RegularExpressions.Symbolic
internal sealed class SymbolicRegexNode where S : notnull
{
internal const string EmptyCharClass = "[]";
+ /// Some byte other than 0 to represent true
+ internal const byte TrueByte = 1;
+ /// Some byte other than 0 to represent false
+ internal const byte FalseByte = 2;
+ /// The undefined value is the default value 0
+ internal const byte UndefinedByte = 0;
internal readonly SymbolicRegexBuilder _builder;
internal readonly SymbolicRegexKind _kind;
@@ -23,7 +29,11 @@ internal sealed class SymbolicRegexNode where S : notnull
internal readonly SymbolicRegexNode? _right;
internal readonly SymbolicRegexSet? _alts;
- private Dictionary? _nullabilityCache;
+ ///
+ /// Caches nullability of this node for any given context (0 <= context < ContextLimit)
+ /// when _info.StartsWithSomeAnchor and _info.CanBeNullable are true. Otherwise the cache is null.
+ ///
+ private byte[]? _nullabilityCache;
private S _startSet;
@@ -50,6 +60,7 @@ private SymbolicRegexNode(SymbolicRegexBuilder builder, SymbolicRegexKind kin
_info = info;
_hashcode = ComputeHashCode();
_startSet = ComputeStartSet();
+ _nullabilityCache = info.StartsWithSomeAnchor && info.CanBeNullable ? new byte[CharKind.ContextLimit] : null;
}
private bool _isInternalizedUnion;
@@ -162,92 +173,100 @@ static void AppendToList(SymbolicRegexNode concat, List>
/// kind info for previous and next characters
internal bool IsNullableFor(uint context)
{
- if (!_info.StartsWithSomeAnchor)
- return IsNullable;
-
- if (!_info.CanBeNullable)
- return false;
+ if (_nullabilityCache is null)
+ {
+ // if _nullabilityCache is null then IsNullable==CanBeNullable
+ // Observe that if IsNullable==true then CanBeNullable==true.
+ // but when the node does not start with an anchor
+ // and IsNullable==false then CanBeNullable==false.
+ return _info.IsNullable;
+ }
if (!StackHelper.TryEnsureSufficientExecutionStack())
{
return StackHelper.CallOnEmptyStack(IsNullableFor, context);
}
- // Initialize the nullability cache for this node.
- _nullabilityCache ??= new Dictionary();
+ Debug.Assert(context < CharKind.ContextLimit);
- if (!_nullabilityCache.TryGetValue(context, out bool is_nullable))
+ // If nullablity has been computed for the given context then return it
+ byte b = Volatile.Read(ref _nullabilityCache[context]);
+ if (b != UndefinedByte)
{
- switch (_kind)
- {
- case SymbolicRegexKind.Loop:
- Debug.Assert(_left is not null);
- is_nullable = _lower == 0 || _left.IsNullableFor(context);
- break;
+ return b == TrueByte;
+ }
- case SymbolicRegexKind.Concat:
- Debug.Assert(_left is not null && _right is not null);
- is_nullable = _left.IsNullableFor(context) && _right.IsNullableFor(context);
- break;
+ // Otherwise compute the nullability recursively for the given context
+ bool is_nullable;
+ switch (_kind)
+ {
+ case SymbolicRegexKind.Loop:
+ Debug.Assert(_left is not null);
+ is_nullable = _lower == 0 || _left.IsNullableFor(context);
+ break;
- case SymbolicRegexKind.Or:
- case SymbolicRegexKind.And:
- Debug.Assert(_alts is not null);
- is_nullable = _alts.IsNullableFor(context);
- break;
+ case SymbolicRegexKind.Concat:
+ Debug.Assert(_left is not null && _right is not null);
+ is_nullable = _left.IsNullableFor(context) && _right.IsNullableFor(context);
+ break;
- case SymbolicRegexKind.Not:
- Debug.Assert(_left is not null);
- is_nullable = !_left.IsNullableFor(context);
- break;
+ case SymbolicRegexKind.Or:
+ case SymbolicRegexKind.And:
+ Debug.Assert(_alts is not null);
+ is_nullable = _alts.IsNullableFor(context);
+ break;
- case SymbolicRegexKind.StartAnchor:
- is_nullable = CharKind.Prev(context) == CharKind.StartStop;
- break;
+ case SymbolicRegexKind.Not:
+ Debug.Assert(_left is not null);
+ is_nullable = !_left.IsNullableFor(context);
+ break;
- case SymbolicRegexKind.EndAnchor:
- is_nullable = CharKind.Next(context) == CharKind.StartStop;
- break;
+ case SymbolicRegexKind.StartAnchor:
+ is_nullable = CharKind.Prev(context) == CharKind.StartStop;
+ break;
- case SymbolicRegexKind.BOLAnchor:
- // Beg-Of-Line anchor is nullable when the previous character is Newline or Start
- // note: at least one of the bits must be 1, but both could also be 1 in case of very first newline
- is_nullable = (CharKind.Prev(context) & CharKind.NewLineS) != 0;
- break;
+ case SymbolicRegexKind.EndAnchor:
+ is_nullable = CharKind.Next(context) == CharKind.StartStop;
+ break;
- case SymbolicRegexKind.EOLAnchor:
- // End-Of-Line anchor is nullable when the next character is Newline or Stop
- // note: at least one of the bits must be 1, but both could also be 1 in case of \Z
- is_nullable = (CharKind.Next(context) & CharKind.NewLineS) != 0;
- break;
+ case SymbolicRegexKind.BOLAnchor:
+ // Beg-Of-Line anchor is nullable when the previous character is Newline or Start
+ // note: at least one of the bits must be 1, but both could also be 1 in case of very first newline
+ is_nullable = (CharKind.Prev(context) & CharKind.NewLineS) != 0;
+ break;
- case SymbolicRegexKind.WBAnchor:
- // test that prev char is word letter iff next is not not word letter
- is_nullable = ((CharKind.Prev(context) & CharKind.WordLetter) ^ (CharKind.Next(context) & CharKind.WordLetter)) != 0;
- break;
+ case SymbolicRegexKind.EOLAnchor:
+ // End-Of-Line anchor is nullable when the next character is Newline or Stop
+ // note: at least one of the bits must be 1, but both could also be 1 in case of \Z
+ is_nullable = (CharKind.Next(context) & CharKind.NewLineS) != 0;
+ break;
- case SymbolicRegexKind.NWBAnchor:
- // test that prev char is word letter iff next is word letter
- is_nullable = ((CharKind.Prev(context) & CharKind.WordLetter) ^ (CharKind.Next(context) & CharKind.WordLetter)) == 0;
- break;
+ case SymbolicRegexKind.WBAnchor:
+ // test that prev char is word letter iff next is not not word letter
+ is_nullable = ((CharKind.Prev(context) & CharKind.WordLetter) ^ (CharKind.Next(context) & CharKind.WordLetter)) != 0;
+ break;
- case SymbolicRegexKind.EndAnchorZ:
- // \Z anchor is nullable when the next character is either the last Newline or Stop
- // note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
- is_nullable = (CharKind.Next(context) & CharKind.StartStop) != 0;
- break;
+ case SymbolicRegexKind.NWBAnchor:
+ // test that prev char is word letter iff next is word letter
+ is_nullable = ((CharKind.Prev(context) & CharKind.WordLetter) ^ (CharKind.Next(context) & CharKind.WordLetter)) == 0;
+ break;
- default: //SymbolicRegexKind.EndAnchorZRev:
- // EndAnchorZRev (rev(\Z)) anchor is nullable when the prev character is either the first Newline or Start
- // note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
- Debug.Assert(_kind == SymbolicRegexKind.EndAnchorZRev);
- is_nullable = (CharKind.Prev(context) & CharKind.StartStop) != 0;
- break;
- }
+ case SymbolicRegexKind.EndAnchorZ:
+ // \Z anchor is nullable when the next character is either the last Newline or Stop
+ // note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
+ is_nullable = (CharKind.Next(context) & CharKind.StartStop) != 0;
+ break;
- _nullabilityCache[context] = is_nullable;
+ default: // SymbolicRegexKind.EndAnchorZRev:
+ // EndAnchorZRev (rev(\Z)) anchor is nullable when the prev character is either the first Newline or Start
+ // note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
+ Debug.Assert(_kind == SymbolicRegexKind.EndAnchorZRev);
+ is_nullable = (CharKind.Prev(context) & CharKind.StartStop) != 0;
+ break;
}
+ Volatile.Write(ref _nullabilityCache[context], is_nullable ? TrueByte : FalseByte);
+
return is_nullable;
}
diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs
index 6dd56fc109e9b3..e7f0b4d2985538 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs
@@ -1827,5 +1827,33 @@ public async Task UseRegexConcurrently_ThreadSafe_Success(RegexEngine engine, Ti
}, CancellationToken.None, TaskCreationOptions.LongRunning, TaskScheduler.Default)).ToArray());
}
}
+
+ [Theory]
+ [MemberData(nameof(MatchWordsInAnchoredRegexes_TestData))]
+ public async Task MatchWordsInAnchoredRegexes(RegexEngine engine, RegexOptions options, string pattern, string input, (int, int)[] matches)
+ {
+ // The aim of these test is to test corner cases of matches involving anchors
+ // For NonBacktracking these tests are meant to
+ // cover most contexts in _nullabilityForContext in SymbolicRegexNode
+ Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options);
+ MatchCollection ms = r.Matches(input);
+ Assert.Equal(matches.Length, ms.Count);
+ for (int i = 0; i < matches.Length; i++)
+ {
+ Assert.Equal(ms[i].Index, matches[i].Item1);
+ Assert.Equal(ms[i].Length, matches[i].Item2);
+ }
+ }
+
+ public static IEnumerable