diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs index 4cc3dc528605b9..cfe5e9859ed61e 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs @@ -20,6 +20,7 @@ internal sealed class RegexInterpreter : RegexRunner private int _codepos; private bool _rightToLeft; private bool _caseInsensitive; + private int _maxBacktrackPosition = -1; public RegexInterpreter(RegexCode code, CultureInfo culture) { @@ -214,6 +215,23 @@ private char Forwardcharnext() return _caseInsensitive ? _textInfo.ToLower(ch) : ch; } + private void OptimizeRuntextposBacktracking(string str) + { + // If called after a greedy op such as a .*, we would have zipped runtextpos to the end without really examining any characters. Reset to maxBacktrackPos here as an optimization + if (_maxBacktrackPosition != -1 && runtextpos > _maxBacktrackPosition) + { + // If lastIndexOf is -1, we backtrack to the max extent possible. + runtextpos = _maxBacktrackPosition; + ReadOnlySpan runtextSpan = runtext.AsSpan(_maxBacktrackPosition); + int lastIndexOf = runtextSpan.LastIndexOf(str); + if (lastIndexOf > -1) + { + // Found the next position to match. Move runtextpos here + runtextpos = _maxBacktrackPosition + lastIndexOf; + } + } + } + private bool MatchString(string str) { int c = str.Length; @@ -223,6 +241,7 @@ private bool MatchString(string str) { if (runtextend - runtextpos < c) { + OptimizeRuntextposBacktracking(str); return false; } @@ -1026,8 +1045,10 @@ protected override void Go() continue; case RegexCode.One: - if (Forwardchars() < 1 || Forwardcharnext() != (char)Operand(0)) + char chOne = (char)Operand(0); + if (Forwardchars() < 1 || Forwardcharnext() != chOne) { + OptimizeRuntextposBacktracking(chOne.ToString()); break; } advance = 1; @@ -1185,6 +1206,7 @@ protected override void Go() int len = Math.Min(Operand(1), Forwardchars()); char ch = (char)Operand(0); int i; + int tempMaxBacktrackPosition = runtextpos; if (!_rightToLeft && !_caseInsensitive) { @@ -1217,6 +1239,8 @@ protected override void Go() if (len > i && _operator == RegexCode.Notoneloop) { TrackPush(len - i - 1, runtextpos - Bump()); + Debug.Assert(_maxBacktrackPosition == -1, $"maxBacktrackPosition = {_maxBacktrackPosition}, runtext = {runtext}, runtextpos = {runtextpos}, ch = {ch}, code = {_code}, runregex = {runregex}"); + _maxBacktrackPosition = tempMaxBacktrackPosition; } } advance = 2; @@ -1261,6 +1285,16 @@ protected override void Go() { int i = TrackPeek(); int pos = TrackPeek(1); + if (_maxBacktrackPosition != -1 && pos > _maxBacktrackPosition && runtextpos < pos && _operator == (RegexCode.Notoneloop | RegexCode.Back) && !_rightToLeft) + { + // The Multi node has bumped us along already + int difference = pos - _maxBacktrackPosition; + Debug.Assert(difference > 0); + pos = runtextpos; + i -= difference; + // We shouldn't be backtracking anymore. + _maxBacktrackPosition = -1; + } runtextpos = pos; if (i > 0) { diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index d2cd6b4e3e1a70..f9eaa966caf543 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -204,6 +204,7 @@ public static IEnumerable Match_Basic_TestData() // Using *, +, ?, {}: Actual - "a+\\.?b*\\.?c{2}" yield return new object[] { @"a+\.?b*\.+c{2}", "ab.cc", RegexOptions.None, 0, 5, true, "ab.cc" }; + yield return new object[] { @"[^a]+\.[^z]+", "zzzzz", RegexOptions.None, 0, 5, false, string.Empty }; // RightToLeft yield return new object[] { @"\s+\d+", "sdf 12sad", RegexOptions.RightToLeft, 0, 9, true, " 12" };