Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -308,11 +308,11 @@ private void ValidateFinalTreeInvariants()
break;

case Testref:
Debug.Assert(childCount is 1 or 2, $"Expected one or two children for {node.TypeName}, got {childCount}");
Debug.Assert(childCount == 2, $"Expected two children for {node.TypeName}, got {childCount}");
break;

case Testgroup:
Debug.Assert(childCount is 2 or 3, $"Expected two or three children for {node.TypeName}, got {childCount}");
Debug.Assert(childCount == 3, $"Expected three children for {node.TypeName}, got {childCount}");
break;

case Concatenate:
Expand Down Expand Up @@ -562,39 +562,20 @@ public bool IsAtomicByParent()
/// <summary>
/// Removes redundant nodes from the subtree, and returns an optimized subtree.
/// </summary>
internal RegexNode Reduce()
{
switch (Type)
internal RegexNode Reduce() =>
Type switch
{
case Alternate:
return ReduceAlternation();

case Concatenate:
return ReduceConcatenation();

case Loop:
case Lazyloop:
return ReduceLoops();

case Atomic:
return ReduceAtomic();

case Group:
return ReduceGroup();

case Set:
case Setloop:
case Setloopatomic:
case Setlazy:
return ReduceSet();

case Prevent:
return ReducePrevent();

default:
return this;
}
}
Alternate => ReduceAlternation(),
Atomic => ReduceAtomic(),
Concatenate => ReduceConcatenation(),
Group => ReduceGroup(),
Loop or Lazyloop => ReduceLoops(),
Prevent => ReducePrevent(),
Set or Setloop or Setloopatomic or Setlazy => ReduceSet(),
Testgroup => ReduceTestgroup(),
Testref => ReduceTestref(),
_ => this,
};

/// <summary>Remove an unnecessary Concatenation or Alternation node</summary>
/// <remarks>
Expand Down Expand Up @@ -1819,6 +1800,53 @@ private RegexNode ReducePrevent()
return this;
}

/// <summary>Optimizations for backreference conditionals.</summary>
private RegexNode ReduceTestref()
{
Debug.Assert(Type == Testref);
Debug.Assert(ChildCount() is 1 or 2);

// This isn't so much an optimization as it is changing the tree for consistency.
// We want all engines to be able to trust that every Testref will have two children,
// even though it's optional in the syntax. If it's missing a "not matched" branch,
// we add one that will match empty.
if (ChildCount() == 1)
{
AddChild(new RegexNode(Empty, Options));
}

return this;
}

/// <summary>Optimizations for expression conditionals.</summary>
private RegexNode ReduceTestgroup()
{
Debug.Assert(Type == Testgroup);
Debug.Assert(ChildCount() is 2 or 3);

// This isn't so much an optimization as it is changing the tree for consistency.
// We want all engines to be able to trust that every Testgroup will have three children,
// even though it's optional in the syntax. If it's missing a "not matched" branch,
// we add one that will match empty.
if (ChildCount() == 2)
{
AddChild(new RegexNode(Empty, Options));
}

// It's common for the condition to be an explicit positive lookahead, as specifying
// that eliminates any ambiguity in syntax as to whether the expression is to be matched
// as an expression or to be a reference to a capture group. After parsing, however,
// there's no ambiguity, and we can remove an extra level of positive lookahead, as the
// engines need to treat the condition as a zero-width positive, atomic assertion regardless.
RegexNode condition = Child(0);
if (condition.Type == Require && (condition.Options & RegexOptions.RightToLeft) == 0)
{
ReplaceChild(0, condition.Child(0));
}

return this;
}

/// <summary>
/// Determines whether node can be switched to an atomic loop. Subsequent is the node
/// immediately after 'node'.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -288,13 +288,7 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex)
Emit(RegexCode.Goto, 0);
PatchJump(Branchpos, _emitted.Length);
Emit(RegexCode.Forejump);
if (node.ChildCount() > 1)
{
break;
}

// else fallthrough
goto case 1;
break;
}
case 1:
PatchJump(_intStack.Pop(), _emitted.Length);
Expand Down Expand Up @@ -328,11 +322,7 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex)
PatchJump(Branchpos, _emitted.Length);
Emit(RegexCode.Getmark);
Emit(RegexCode.Forejump);

if (node.ChildCount() > 2)
break;
// else fallthrough
goto case 2;
break;
case 2:
PatchJump(_intStack.Pop(), _emitted.Length);
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ SymbolicRegexNode<BDD> ConvertSetloop(RegexNode node, bool isLazy)

bool IsDotStar(RegexNode node) => node.Type == RegexNode.Setloop && Convert(node, topLevel: false).IsAnyStar;

bool IsIntersect(RegexNode node) => node.Type == RegexNode.Testgroup && node.ChildCount() > 2 && IsNothing(node.Child(2));
bool IsIntersect(RegexNode node) => node.Type == RegexNode.Testgroup && IsNothing(node.Child(2));

bool TryGetIntersection(RegexNode node, [Diagnostics.CodeAnalysis.NotNullWhen(true)] out List<RegexNode>? conjuncts)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -833,7 +833,7 @@ public static IEnumerable<object[]> Groups_Basic_TestData()
yield return new object[] { engine, null, @"abcd(.*)f", @"abcabcdefg", options, new string[] { "abcdef", "e" } };
}

// Grouping Constructs Invalid Regular Expressions
// Grouping Constructs
yield return new object[] { engine, null, @"()", "cat", RegexOptions.None, new string[] { string.Empty, string.Empty } };
yield return new object[] { engine, null, @"(?<cat>)", "cat", RegexOptions.None, new string[] { string.Empty, string.Empty } };
yield return new object[] { engine, null, @"(?'cat')", "cat", RegexOptions.None, new string[] { string.Empty, string.Empty } };
Expand All @@ -844,13 +844,15 @@ public static IEnumerable<object[]> Groups_Basic_TestData()
yield return new object[] { engine, null, @"(?<=)", "cat", RegexOptions.None, new string[] { string.Empty } };
yield return new object[] { engine, null, @"(?>)", "cat", RegexOptions.None, new string[] { string.Empty } };

// Alternation construct Invalid Regular Expressions
// Alternation construct
yield return new object[] { engine, null, @"(?()|)", "(?()|)", RegexOptions.None, new string[] { "" } };

yield return new object[] { engine, null, @"(?(cat)|)", "cat", RegexOptions.None, new string[] { "" } };
yield return new object[] { engine, null, @"(?(cat)|)", "dog", RegexOptions.None, new string[] { "" } };

yield return new object[] { engine, null, @"(?(cat)catdog|)", "catdog", RegexOptions.None, new string[] { "catdog" } };
yield return new object[] { engine, null, @"(?(cat)cat\w\w\w)*", "catdogcathog", RegexOptions.None, new string[] { "catdogcathog" } };
yield return new object[] { engine, null, @"(?(?=cat)cat\w\w\w)*", "catdogcathog", RegexOptions.None, new string[] { "catdogcathog" } };
yield return new object[] { engine, null, @"(?(cat)catdog|)", "dog", RegexOptions.None, new string[] { "" } };
yield return new object[] { engine, null, @"(?(cat)dog|)", "dog", RegexOptions.None, new string[] { "" } };
yield return new object[] { engine, null, @"(?(cat)dog|)", "cat", RegexOptions.None, new string[] { "" } };
Expand All @@ -859,6 +861,9 @@ public static IEnumerable<object[]> Groups_Basic_TestData()
yield return new object[] { engine, null, @"(?(cat)|catdog)", "catdog", RegexOptions.None, new string[] { "" } };
yield return new object[] { engine, null, @"(?(cat)|dog)", "dog", RegexOptions.None, new string[] { "dog" } };

yield return new object[] { engine, null, @"(?((\w{3}))\1\1|no)", "dogdogdog", RegexOptions.None, new string[] { "dogdog", "dog" } };
yield return new object[] { engine, null, @"(?((\w{3}))\1\1|no)", "no", RegexOptions.None, new string[] { "no", "" } };

// Invalid unicode
yield return new object[] { engine, null, "([\u0000-\uFFFF-[azAZ09]]|[\u0000-\uFFFF-[^azAZ09]])+", "azAZBCDE1234567890BCDEFAZza", RegexOptions.None, new string[] { "azAZBCDE1234567890BCDEFAZza", "a" } };
yield return new object[] { engine, null, "[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[a]]]]]]+", "abcxyzABCXYZ123890", RegexOptions.None, new string[] { "bcxyzABCXYZ123890" } };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -357,12 +357,17 @@ public static IEnumerable<object[]> Match_MemberData()
yield return ("(?(dog2))", "dog2", RegexOptions.None, 0, 4, true, string.Empty);
yield return ("(?(a:b))", "a", RegexOptions.None, 0, 1, true, string.Empty);
yield return ("(?(a:))", "a", RegexOptions.None, 0, 1, true, string.Empty);
yield return ("(?(cat)cat|dog)", "cat", RegexOptions.None, 0, 3, true, "cat");
yield return ("(?((?=cat))cat|dog)", "cat", RegexOptions.None, 0, 3, true, "cat");
yield return ("(?(cat)|dog)", "cat", RegexOptions.None, 0, 3, true, string.Empty);
yield return ("(?(cat)|dog)", "catdog", RegexOptions.None, 0, 6, true, string.Empty);
yield return ("(?(cat)|dog)", "oof", RegexOptions.None, 0, 3, false, string.Empty);
yield return ("(?(cat)dog1|dog2)", "catdog1", RegexOptions.None, 0, 7, false, string.Empty);
yield return ("(?(cat)dog1|dog2)", "catdog2", RegexOptions.None, 0, 7, true, "dog2");
yield return ("(?(cat)dog1|dog2)", "catdog1dog2", RegexOptions.None, 0, 11, true, "dog2");
yield return (@"(?(\w+)\w+)dog", "catdog", RegexOptions.None, 0, 6, true, "catdog");
yield return (@"(?(abc)\w+|\w{0,2})dog", "catdog", RegexOptions.None, 0, 6, true, "atdog");
yield return (@"(?(abc)cat|\w{0,2})dog", "catdog", RegexOptions.None, 0, 6, true, "atdog");
yield return (@"(\w+|\d+)a+[ab]+", "123123aa", RegexOptions.None, 0, 8, true, "123123aa");
yield return ("(a|ab|abc|abcd)d", "abcd", RegexOptions.RightToLeft, 0, 4, true, "abcd");
yield return ("(?>(?:a|ab|abc|abcd))d", "abcd", RegexOptions.None, 0, 4, false, string.Empty);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,32 @@ public static IEnumerable<object[]> Matches_TestData()
}
};

yield return new object[]
{
engine,
@"(?(\w+)\w+|)", "abcd", RegexOptions.None,
new CaptureData[]
{
new CaptureData("abcd", 0, 4),
new CaptureData("", 4, 0),
}
};

if (!PlatformDetection.IsNetFramework)
{
// .NET Framework has some behavioral inconsistencies when there's no else branch.
yield return new object[]
{
engine,
@"(?(\w+)\w+)", "abcd", RegexOptions.None,
new CaptureData[]
{
new CaptureData("abcd", 0, 4),
new CaptureData("", 4, 0),
}
};
}

yield return new object[]
{
engine,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,11 @@ private static int GetMinRequiredLength(Regex r)
[InlineData("xyz(?:(?i:abcde)|(?i:abcdf))", "xyz(?i:abcd[ef])")]
[InlineData("bonjour|hej|ciao|shalom|zdravo|pozdrav|hallo|hola|hello|hey|witam|tere|bonjou|salam|helo|sawubona", "(?>bonjou(?>r|)|h(?>e(?>j|(?>l(?>lo|o)|y))|allo|ola)|ciao|s(?>halom|a(?>lam|wubona))|zdravo|pozdrav|witam|tere)")]
[InlineData("\\w\\d123|\\w\\dabc", "\\w\\d(?:123|abc)")]
[InlineData("(a)(?(1)b)", "(a)(?(1)b|)")]
[InlineData("(abc)(?(1)def)", "(abc)(?(1)def|)")]
[InlineData("(?(a)a)", "(?(a)a|)")]
[InlineData("(?(abc)def)", "(?(abc)def|)")]
[InlineData("(?(\\w)\\d)", "(?(\\w)\\d|)")]
// Auto-atomicity
[InlineData("a*b", "(?>a*)b")]
[InlineData("a*b+", "(?>a*)b+")]
Expand Down