-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Eliminate backtracking in the interpreter for patterns with .* #51508
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
2511bfa
781541c
1c7faa0
7ea6425
9c234d4
85b6897
7a404e9
1949445
1564d7a
7f76e5f
ac838ad
262d5d5
aacb00e
4f6bdd4
d8e73cc
cb3f3f2
3ba7f45
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,6 @@ | ||
| // Licensed to the .NET Foundation under one or more agreements. | ||
| // The .NET Foundation licenses this file to you under the MIT license. | ||
|
|
||
| using System.Collections.Generic; | ||
| using System.Diagnostics; | ||
| using System.Diagnostics.CodeAnalysis; | ||
| using System.Globalization; | ||
|
|
@@ -21,6 +20,7 @@ internal sealed class RegexInterpreter : RegexRunner | |
| private int _codepos; | ||
| private bool _rightToLeft; | ||
| private bool _caseInsensitive; | ||
| private int _maxBacktrackPosition = -1; | ||
|
|
||
| public RegexInterpreter(RegexCode code, CultureInfo culture) | ||
| { | ||
|
|
@@ -224,6 +224,20 @@ private bool MatchString(string str) | |
| { | ||
| if (runtextend - runtextpos < c) | ||
| { | ||
| // If MatchString was called after a greedy op such as a .*, we would have zipped runtextpos to the end without really examining any characters. Reset to maxBacktrackPos here as an optimization | ||
| if (_maxBacktrackPosition != -1 && runtextpos > _maxBacktrackPosition) | ||
pgovind marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| { | ||
| // If lastIndexOf is -1, we backtrack to the max extent possible. | ||
| runtextpos = _maxBacktrackPosition; | ||
| ReadOnlySpan<char> runtextSpan = runtext.AsSpan(_maxBacktrackPosition); | ||
|
||
| int lastIndexOf = runtextSpan.LastIndexOf(str); | ||
stephentoub marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if (lastIndexOf > -1) | ||
| { | ||
| // Found the next position to match. Move runtextpos here | ||
| runtextpos = _maxBacktrackPosition + lastIndexOf; | ||
stephentoub marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
|
|
||
| return false; | ||
| } | ||
|
|
||
|
|
@@ -1059,17 +1073,7 @@ protected override void Go() | |
| continue; | ||
|
|
||
| case RegexCode.Multi: | ||
| int stringTableIndex = Operand(0); | ||
| char textChar = runtext![runtextpos]; | ||
| if (_code.FirstLetterToStringTableIndices.TryGetValue(textChar, out HashSet<int>? stringTableIndices)) | ||
| { | ||
| if (!stringTableIndices.Contains(stringTableIndex)) | ||
| { | ||
| // We are trying a pattern that doesn't start with the right char, so there's no way we can match. | ||
| break; | ||
| } | ||
| } | ||
| if (!MatchString(_code.Strings[stringTableIndex])) | ||
| if (!MatchString(_code.Strings[Operand(0)])) | ||
| { | ||
| break; | ||
| } | ||
|
|
@@ -1196,6 +1200,7 @@ protected override void Go() | |
| int len = Math.Min(Operand(1), Forwardchars()); | ||
| char ch = (char)Operand(0); | ||
| int i; | ||
| int tempMaxBacktrackPosition = runtextpos; | ||
|
|
||
| if (!_rightToLeft && !_caseInsensitive) | ||
| { | ||
|
|
@@ -1228,6 +1233,8 @@ protected override void Go() | |
| if (len > i && _operator == RegexCode.Notoneloop) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder whether this should also happen for |
||
| { | ||
| TrackPush(len - i - 1, runtextpos - Bump()); | ||
| Debug.Assert(_maxBacktrackPosition == -1); | ||
| _maxBacktrackPosition = tempMaxBacktrackPosition; | ||
stephentoub marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
| advance = 2; | ||
|
|
@@ -1272,6 +1279,16 @@ protected override void Go() | |
| { | ||
| int i = TrackPeek(); | ||
| int pos = TrackPeek(1); | ||
| if (_maxBacktrackPosition != -1 && pos > _maxBacktrackPosition && runtextpos < pos && _operator == (RegexCode.Notoneloop | RegexCode.Back) && !_rightToLeft) | ||
| { | ||
| // The Multi node has bumped us along already | ||
| int difference = pos - _maxBacktrackPosition; | ||
| Debug.Assert(difference > 0); | ||
| pos = runtextpos; | ||
| i -= difference; | ||
| // We shouldn't be backtracking anymore. | ||
| _maxBacktrackPosition = -1; | ||
| } | ||
| runtextpos = pos; | ||
| if (i > 0) | ||
| { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.