Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -253,23 +253,40 @@ public static ImmutableArray<CSharpDirective> EvaluateDirectives(
}
}

internal readonly record struct SourceFile(string Path, SourceText Text)
internal readonly record struct SourceFile(string Path, SourceText Text, bool HasUtf8Bom = false)
{
public static SourceFile Load(string filePath)
{
using var stream = File.OpenRead(filePath);
return new SourceFile(filePath, SourceText.From(stream, Encoding.UTF8));
bool hasUtf8Bom = DetectUtf8Bom(stream);
stream.Position = 0; // Reset stream position after BOM detection
return new SourceFile(filePath, SourceText.From(stream, Encoding.UTF8), hasUtf8Bom);
}

private static bool DetectUtf8Bom(Stream stream)
{
// UTF-8 BOM is 0xEF 0xBB 0xBF
if (stream.Length < 3)
{
return false;
}

byte[] buffer = new byte[3];
int bytesRead = stream.Read(buffer, 0, 3);
return bytesRead == 3 && buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF;
}

public SourceFile WithText(SourceText newText)
{
return new SourceFile(Path, newText);
return new SourceFile(Path, newText, HasUtf8Bom);
}

public void Save()
{
using var stream = File.Open(Path, FileMode.Create, FileAccess.Write);
using var writer = new StreamWriter(stream, Encoding.UTF8);
// Use UTF8Encoding with encoderShouldEmitUTF8Identifier set to match the original file
var encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: HasUtf8Bom);
using var writer = new StreamWriter(stream, encoding);
Text.Write(writer);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,18 @@ Microsoft.DotNet.FileBasedPrograms.SimpleDiagnostic.Position.TextSpan.init -> vo
Microsoft.DotNet.FileBasedPrograms.SimpleDiagnostic.SimpleDiagnostic() -> void
Microsoft.DotNet.FileBasedPrograms.SourceFile
Microsoft.DotNet.FileBasedPrograms.SourceFile.Deconstruct(out string! Path, out Microsoft.CodeAnalysis.Text.SourceText! Text) -> void
Microsoft.DotNet.FileBasedPrograms.SourceFile.Deconstruct(out string! Path, out Microsoft.CodeAnalysis.Text.SourceText! Text, out bool HasUtf8Bom) -> void
Microsoft.DotNet.FileBasedPrograms.SourceFile.Equals(Microsoft.DotNet.FileBasedPrograms.SourceFile other) -> bool
Microsoft.DotNet.FileBasedPrograms.SourceFile.GetFileLinePositionSpan(Microsoft.CodeAnalysis.Text.TextSpan span) -> Microsoft.CodeAnalysis.FileLinePositionSpan
Microsoft.DotNet.FileBasedPrograms.SourceFile.GetLocationString(Microsoft.CodeAnalysis.Text.TextSpan span) -> string!
Microsoft.DotNet.FileBasedPrograms.SourceFile.HasUtf8Bom.get -> bool
Microsoft.DotNet.FileBasedPrograms.SourceFile.HasUtf8Bom.init -> void
Microsoft.DotNet.FileBasedPrograms.SourceFile.Path.get -> string!
Microsoft.DotNet.FileBasedPrograms.SourceFile.Path.init -> void
Microsoft.DotNet.FileBasedPrograms.SourceFile.Save() -> void
Microsoft.DotNet.FileBasedPrograms.SourceFile.SourceFile() -> void
Microsoft.DotNet.FileBasedPrograms.SourceFile.SourceFile(string! Path, Microsoft.CodeAnalysis.Text.SourceText! Text) -> void
Microsoft.DotNet.FileBasedPrograms.SourceFile.SourceFile(string! Path, Microsoft.CodeAnalysis.Text.SourceText! Text, bool HasUtf8Bom = false) -> void
Microsoft.DotNet.FileBasedPrograms.SourceFile.Text.get -> Microsoft.CodeAnalysis.Text.SourceText!
Microsoft.DotNet.FileBasedPrograms.SourceFile.Text.init -> void
Microsoft.DotNet.FileBasedPrograms.SourceFile.WithText(Microsoft.CodeAnalysis.Text.SourceText! newText) -> Microsoft.DotNet.FileBasedPrograms.SourceFile
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,77 @@ public void RemoveMultiple()
"""));
}

/// <summary>
/// Verifies that files without UTF-8 BOM don't get one added when saved.
/// This is critical for shebang (#!) scripts on Unix-like systems.
/// </summary>
[Fact]
public void PreservesNoBomEncoding()
{
var tempFile = Path.Combine(Path.GetTempPath(), $"test_{Guid.NewGuid()}.cs");
try
{
// Create a file without BOM
var content = "#!/usr/bin/env dotnet run\nConsole.WriteLine();";
File.WriteAllText(tempFile, content, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false));

// Load, modify, and save
var sourceFile = SourceFile.Load(tempFile);
var editor = FileBasedAppSourceEditor.Load(sourceFile);
editor.Add(new CSharpDirective.Package(default) { Name = "MyPackage", Version = "1.0.0" });
editor.SourceFile.Save();

// Verify no BOM was added
var bytes = File.ReadAllBytes(tempFile);
Assert.False(bytes.Length >= 3 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF,
"File should not have UTF-8 BOM");

// Verify shebang is still first
var savedContent = File.ReadAllText(tempFile);
Assert.StartsWith("#!/usr/bin/env dotnet run", savedContent);
}
finally
{
if (File.Exists(tempFile))
{
File.Delete(tempFile);
}
}
}

/// <summary>
/// Verifies that files with UTF-8 BOM preserve it when saved.
/// </summary>
[Fact]
public void PreservesBomEncoding()
{
var tempFile = Path.Combine(Path.GetTempPath(), $"test_{Guid.NewGuid()}.cs");
try
{
// Create a file with BOM
var content = "Console.WriteLine();";
File.WriteAllText(tempFile, content, new UTF8Encoding(encoderShouldEmitUTF8Identifier: true));

// Load, modify, and save
var sourceFile = SourceFile.Load(tempFile);
var editor = FileBasedAppSourceEditor.Load(sourceFile);
editor.Add(new CSharpDirective.Package(default) { Name = "MyPackage", Version = "1.0.0" });
editor.SourceFile.Save();

// Verify BOM is still present
var bytes = File.ReadAllBytes(tempFile);
Assert.True(bytes.Length >= 3 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF,
"File should have UTF-8 BOM");
}
finally
{
if (File.Exists(tempFile))
{
File.Delete(tempFile);
}
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to verify the behavior when original file uses some other encoding besides UTF-8.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in 643569f. The new PreservesNonUtf8Encoding test verifies that UTF-16 LE encoding (with BOM 0xFF 0xFE) is preserved correctly. SourceText's auto-detection handles various encodings, not just UTF-8.


private void Verify(
string input,
params ReadOnlySpan<(Action<FileBasedAppSourceEditor> action, string expectedOutput)> verify)
Expand Down
Loading