Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Fix non-determinism in Regex source generator
The source generator enumerates a Hashtable to write out its contents.  When the keys of the Hashtable are strings, string hash code randomization may result in the order of that enumeration being different in different processes, leading to non-deterministic ordering of values written out and thus non-deterministic source generator output.
  • Loading branch information
stephentoub committed Nov 9, 2022
commit 0358f3cc509f2f1c0eba51cd13023d278978e1aa
Original file line number Diff line number Diff line change
Expand Up @@ -123,13 +123,13 @@ private static void EmitRegexDerivedImplementation(
if (rm.Tree.CaptureNumberSparseMapping is not null)
{
writer.Write(" base.Caps = new Hashtable {");
AppendHashtableContents(writer, rm.Tree.CaptureNumberSparseMapping);
AppendHashtableContents(writer, rm.Tree.CaptureNumberSparseMapping.Cast<DictionaryEntry>());
writer.WriteLine($" }};");
}
if (rm.Tree.CaptureNameToNumberMapping is not null)
{
writer.Write(" base.CapNames = new Hashtable {");
AppendHashtableContents(writer, rm.Tree.CaptureNameToNumberMapping);
AppendHashtableContents(writer, rm.Tree.CaptureNameToNumberMapping.Cast<DictionaryEntry>().OrderBy(de => de.Key as string, StringComparer.Ordinal));
writer.WriteLine($" }};");
}
if (rm.Tree.CaptureNames is not null)
Expand All @@ -149,11 +149,10 @@ private static void EmitRegexDerivedImplementation(
writer.WriteLine(runnerFactoryImplementation);
writer.WriteLine($"}}");

static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht)
static void AppendHashtableContents(IndentedTextWriter writer, IEnumerable<DictionaryEntry> contents)
{
IDictionaryEnumerator en = ht.GetEnumerator();
string separator = "";
while (en.MoveNext())
foreach (DictionaryEntry en in contents)
{
writer.Write(separator);
separator = ", ";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ internal static byte[] CreateAssemblyImage(string source, string assemblyName)
throw new InvalidOperationException();
}

internal static async Task<IReadOnlyList<Diagnostic>> RunGenerator(
string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
private static async Task<(Compilation, GeneratorDriverRunResult)> RunGeneratorCore(
string code, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
{
var proj = new AdhocWorkspace()
.AddSolution(SolutionInfo.Create(SolutionId.CreateNewId(), VersionStamp.Create()))
Expand All @@ -87,7 +87,13 @@ internal static async Task<IReadOnlyList<Diagnostic>> RunGenerator(
var generator = new RegexGenerator();
CSharpGeneratorDriver cgd = CSharpGeneratorDriver.Create(new[] { generator.AsSourceGenerator() }, parseOptions: CSharpParseOptions.Default.WithLanguageVersion(langVersion));
GeneratorDriver gd = cgd.RunGenerators(comp!, cancellationToken);
GeneratorDriverRunResult generatorResults = gd.GetRunResult();
return (comp, gd.GetRunResult());
}

internal static async Task<IReadOnlyList<Diagnostic>> RunGenerator(
string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
{
(Compilation comp, GeneratorDriverRunResult generatorResults) = await RunGeneratorCore(code, langVersion, additionalRefs, allowUnsafe, cancellationToken);
if (!compile)
{
return generatorResults.Diagnostics;
Expand All @@ -107,6 +113,20 @@ internal static async Task<IReadOnlyList<Diagnostic>> RunGenerator(
return generatorResults.Diagnostics.Concat(results.Diagnostics).Where(d => d.Severity != DiagnosticSeverity.Hidden).ToArray();
}

internal static async Task<string> GenerateSourceText(
string code, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
{
(Compilation comp, GeneratorDriverRunResult generatorResults) = await RunGeneratorCore(code, langVersion, additionalRefs, allowUnsafe, cancellationToken);
string generatedSource = string.Concat(generatorResults.GeneratedTrees.Select(t => t.ToString()));

if (generatorResults.Diagnostics.Length != 0)
{
throw new ArgumentException(string.Join(Environment.NewLine, generatorResults.Diagnostics) + Environment.NewLine + generatedSource);
}

return generatedSource;
}

internal static async Task<Regex> SourceGenRegexAsync(
string pattern, CultureInfo? culture, RegexOptions? options = null, TimeSpan? matchTimeout = null, CancellationToken cancellationToken = default)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.DotNet.RemoteExecutor;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Threading.Tasks;
using Xunit;
Expand Down Expand Up @@ -839,5 +841,36 @@ partial class C
public static partial Regex Valid();
}", compile: true));
}

[ConditionalFact(typeof(RemoteExecutor), nameof(RemoteExecutor.IsSupported))]
[OuterLoop("Takes several seconds")]
public void Deterministic_SameRegexProducesSameSource()
{
string first = Generate();
for (int trials = 0; trials < 3; trials++)
{
Assert.Equal(first, Generate());
}

static string Generate()
{
const string Code =
@"using System.Text.RegularExpressions;
partial class C
{
[GeneratedRegex(""(?<Name>\w+) (?<Street>\w+), (?<City>\w+) (?<State>[A-Z]{2}) (?<Zip>[0-9]{5})"")]
public static partial Regex Valid();
}";

// Generate the source in a new process so that any process-specific randomization is different between runs,
// e.g. hash code randomization for strings.

using RemoteInvokeHandle handle = RemoteExecutor.Invoke(
async () => Console.WriteLine(await RegexGeneratorHelper.GenerateSourceText(Code)),
new RemoteInvokeOptions { StartInfo = new ProcessStartInfo { RedirectStandardOutput = true } });

return handle.Process.StandardOutput.ReadToEnd();
}
}
}
}