Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ internal sealed class RegexInterpreter : RegexRunner
private int _codepos;
private bool _rightToLeft;
private bool _caseInsensitive;
private int _maxBacktrackPosition = -1;

public RegexInterpreter(RegexCode code, CultureInfo culture)
{
Expand Down Expand Up @@ -214,6 +215,23 @@ private char Forwardcharnext()
return _caseInsensitive ? _textInfo.ToLower(ch) : ch;
}

private void OptimizeRuntextposBacktracking(string str)
{
// If called after a greedy op such as a .*, we would have zipped runtextpos to the end without really examining any characters. Reset to maxBacktrackPos here as an optimization
if (_maxBacktrackPosition != -1 && runtextpos > _maxBacktrackPosition)
{
// If lastIndexOf is -1, we backtrack to the max extent possible.
runtextpos = _maxBacktrackPosition;
ReadOnlySpan<char> runtextSpan = runtext.AsSpan(_maxBacktrackPosition);
int lastIndexOf = runtextSpan.LastIndexOf(str);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same questions as I had on the previous PR.

if (lastIndexOf > -1)
{
// Found the next position to match. Move runtextpos here
runtextpos = _maxBacktrackPosition + lastIndexOf;
}
}
}

private bool MatchString(string str)
{
int c = str.Length;
Expand All @@ -223,6 +241,7 @@ private bool MatchString(string str)
{
if (runtextend - runtextpos < c)
{
OptimizeRuntextposBacktracking(str);
return false;
}

Expand Down Expand Up @@ -1026,8 +1045,10 @@ protected override void Go()
continue;

case RegexCode.One:
if (Forwardchars() < 1 || Forwardcharnext() != (char)Operand(0))
char chOne = (char)Operand(0);
if (Forwardchars() < 1 || Forwardcharnext() != chOne)
{
OptimizeRuntextposBacktracking(chOne.ToString());
break;
}
advance = 1;
Expand Down Expand Up @@ -1185,6 +1206,7 @@ protected override void Go()
int len = Math.Min(Operand(1), Forwardchars());
char ch = (char)Operand(0);
int i;
int tempMaxBacktrackPosition = runtextpos;

if (!_rightToLeft && !_caseInsensitive)
{
Expand Down Expand Up @@ -1217,6 +1239,8 @@ protected override void Go()
if (len > i && _operator == RegexCode.Notoneloop)
{
TrackPush(len - i - 1, runtextpos - Bump());
Debug.Assert(_maxBacktrackPosition == -1, $"maxBacktrackPosition = {_maxBacktrackPosition}, runtext = {runtext}, runtextpos = {runtextpos}, ch = {ch}, code = {_code}, runregex = {runregex}");
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any assert failures stemming from this line are potential optimizations

_maxBacktrackPosition = tempMaxBacktrackPosition;
}
}
advance = 2;
Expand Down Expand Up @@ -1261,6 +1285,16 @@ protected override void Go()
{
int i = TrackPeek();
int pos = TrackPeek(1);
if (_maxBacktrackPosition != -1 && pos > _maxBacktrackPosition && runtextpos < pos && _operator == (RegexCode.Notoneloop | RegexCode.Back) && !_rightToLeft)
{
// The Multi node has bumped us along already
int difference = pos - _maxBacktrackPosition;
Debug.Assert(difference > 0);
pos = runtextpos;
i -= difference;
// We shouldn't be backtracking anymore.
_maxBacktrackPosition = -1;
}
runtextpos = pos;
if (i > 0)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ public static IEnumerable<object[]> Match_Basic_TestData()

// Using *, +, ?, {}: Actual - "a+\\.?b*\\.?c{2}"
yield return new object[] { @"a+\.?b*\.+c{2}", "ab.cc", RegexOptions.None, 0, 5, true, "ab.cc" };
yield return new object[] { @"[^a]+\.[^z]+", "zzzzz", RegexOptions.None, 0, 5, false, string.Empty };

// RightToLeft
yield return new object[] { @"\s+\d+", "sdf 12sad", RegexOptions.RightToLeft, 0, 9, true, " 12" };
Expand Down