Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -517,8 +517,8 @@ private void ReadVersionAttribute(Span<byte> buffer)
private void ReadPosixAndGnuSharedAttributes(Span<byte> buffer)
{
// Convert the byte arrays
_uName = TarHelpers.GetTrimmedAsciiString(buffer.Slice(FieldLocations.UName, FieldLengths.UName));
_gName = TarHelpers.GetTrimmedAsciiString(buffer.Slice(FieldLocations.GName, FieldLengths.GName));
_uName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.UName, FieldLengths.UName));
_gName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.GName, FieldLengths.GName));

// DevMajor and DevMinor only have values with character devices and block devices.
// For all other typeflags, the values in these fields are irrelevant.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,28 +358,41 @@ private void WriteAsPaxSharedInternal(Span<byte> buffer, out long actualLength)
_checksum = WriteChecksum(tmpChecksum, buffer);
}

// All formats save in the name byte array only the ASCII bytes that fit.
// All formats save in the name byte array only the UTF8 bytes that fit.
private int WriteName(Span<byte> buffer)
{
ReadOnlySpan<char> src = _name.AsSpan(0, Math.Min(_name.Length, FieldLengths.Name));
Span<byte> dest = buffer.Slice(FieldLocations.Name, FieldLengths.Name);
int encoded = Encoding.ASCII.GetBytes(src, dest);
return Checksum(dest.Slice(0, encoded));
ReadOnlySpan<char> name = _name;
int utf16NameTruncatedLength = GetUtf16TruncatedTextLength(name, FieldLengths.Name);

ReadOnlySpan<char> truncatedName = name.Slice(0, utf16NameTruncatedLength);
Span<byte> destination = buffer.Slice(FieldLocations.Name, FieldLengths.Name);
Encoding.UTF8.GetBytes(truncatedName, destination);

return Checksum(destination);
}

// Ustar and PAX save in the name byte array only the ASCII bytes that fit, and the rest of that string is saved in the prefix field.
// Ustar and PAX save in the name byte array only the UTF8 bytes that fit, and the rest of that string is saved in the prefix field.
private int WritePosixName(Span<byte> buffer)
{
int checksum = WriteName(buffer);
ReadOnlySpan<char> name = _name;
int utf16NameTruncatedLength = GetUtf16TruncatedTextLength(name, FieldLengths.Name);

if (_name.Length > FieldLengths.Name)
ReadOnlySpan<char> truncatedName = name.Slice(0, utf16NameTruncatedLength);
Span<byte> destination = buffer.Slice(FieldLocations.Name, FieldLengths.Name);
Encoding.UTF8.GetBytes(truncatedName, destination);

int checksum = Checksum(destination);

if (utf16NameTruncatedLength < name.Length)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the expected behavior if truncatedName as UTF8 only takes up, say, 98 of the 100 bytes in the Name field and then we spill over to Prefix for the rest? How do tools know the extra two bytes in Name aren't actually part of the name?

Copy link
Member Author

@jozkee jozkee Sep 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the thing is, the prefix logic is wrong, I filed #75360 for it. I don't want to fix it here because of time constraint.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not clear to me from that issue: is the problem with how we read/interpret name and prefix, or is the issue with how we write out the name and prefix, or both?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the issue is how we write it.
We currently do this:

Ustar and PAX save in the name byte array only the bytes that fit, and the rest of that string is saved in the prefix field.

Which is actually a suffix.

Instead we should do this:

If the pathname is too long to fit in the 100 bytes provided by the standard format, it can be split at any / character with the first portion going into the prefix field. If the prefix field is not empty, the reader will prepend the prefix value and a / character to the regular name field to obtain the full pathname. The standard does not require a trailing / character on directory names, though most implementations still include this for compatibility reasons.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then if the code that's there is 100% wrong and the code in the PR it's being updated to is 100% wrong, why are we changing that code? Seems like we should either fix it in this PR, or as part of changing it in this PR, this should fail in a more reliable way rather than outputting corrupt data. Do we plan to fix this issue for 7.0 as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, we should fix it for 7.0 as well. Please see #75902 (comment).

{
int prefixBytesLength = Math.Min(_name.Length - FieldLengths.Name, FieldLengths.Prefix);
Span<byte> remaining = stackalloc byte[prefixBytesLength];
int encoded = Encoding.ASCII.GetBytes(_name.AsSpan(FieldLengths.Name, prefixBytesLength), remaining);
Debug.Assert(encoded == remaining.Length);
ReadOnlySpan<char> prefix = name.Slice(utf16NameTruncatedLength);
int utf16PrefixTruncatedLength = GetUtf16TruncatedTextLength(prefix, FieldLengths.Prefix);

destination = buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix);
ReadOnlySpan<char> truncatedPrefix = prefix.Slice(0, utf16PrefixTruncatedLength);
Encoding.UTF8.GetBytes(truncatedPrefix, destination);

checksum += WriteLeftAlignedBytesAndGetChecksum(remaining, buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix));
checksum += Checksum(destination);
}

return checksum;
Expand Down Expand Up @@ -423,7 +436,7 @@ private int WriteCommonFields(Span<byte> buffer, long actualLength, TarEntryType

if (!string.IsNullOrEmpty(_linkName))
{
checksum += WriteAsAsciiString(_linkName, buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName));
checksum += WriteAsUtf8String(_linkName, buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName));
}

return checksum;
Expand Down Expand Up @@ -467,12 +480,12 @@ private int WritePosixAndGnuSharedFields(Span<byte> buffer)

if (!string.IsNullOrEmpty(_uName))
{
checksum += WriteAsAsciiString(_uName, buffer.Slice(FieldLocations.UName, FieldLengths.UName));
checksum += WriteAsUtf8String(_uName, buffer.Slice(FieldLocations.UName, FieldLengths.UName));
}

if (!string.IsNullOrEmpty(_gName))
{
checksum += WriteAsAsciiString(_gName, buffer.Slice(FieldLocations.GName, FieldLengths.GName));
checksum += WriteAsUtf8String(_gName, buffer.Slice(FieldLocations.GName, FieldLengths.GName));
}

if (_devMajor > 0)
Expand Down Expand Up @@ -766,10 +779,10 @@ private static int WriteAsTimestamp(DateTimeOffset timestamp, Span<byte> destina
return FormatOctal(unixTimeSeconds, destination);
}

// Writes the specified text as an ASCII string aligned to the left, and returns its checksum.
private static int WriteAsAsciiString(string str, Span<byte> buffer)
// Writes the specified text as an UTF8 string aligned to the left, and returns its checksum.
private static int WriteAsUtf8String(string str, Span<byte> buffer)
{
byte[] bytes = Encoding.ASCII.GetBytes(str);
byte[] bytes = Encoding.UTF8.GetBytes(str);
return WriteLeftAlignedBytesAndGetChecksum(bytes.AsSpan(), buffer);
}

Expand Down Expand Up @@ -819,5 +832,27 @@ private static string GenerateGlobalExtendedAttributeName(int globalExtendedAttr

return result;
}

// Returns the text's utf16 length truncated at the specified max length.
private static int GetUtf16TruncatedTextLength(ReadOnlySpan<char> text, int maxLength)
{
int utf8Length = 0;
int utf16TruncatedLength = 0;

foreach (Rune rune in text.EnumerateRunes())
{
utf8Length += rune.Utf8SequenceLength;
if (utf8Length <= maxLength)
{
utf16TruncatedLength += rune.Utf16SequenceLength;
}
else
{
break;
}
}

return utf16TruncatedLength;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
<Compile Include="TarTestsBase.Posix.cs" />
<Compile Include="TarTestsBase.Ustar.cs" />
<Compile Include="TarTestsBase.V7.cs" />
<Compile Include="TarWriter\TarWriter.WriteEntry.Entry.Roundtrip.Tests.cs" />
<Compile Include="TarWriter\TarWriter.WriteEntryAsync.File.Tests.cs" />
<Compile Include="TarWriter\TarWriter.WriteEntry.Base.cs" />
<Compile Include="TarWriter\TarWriter.WriteEntryAsync.Tests.cs" />
Expand Down
114 changes: 113 additions & 1 deletion src/libraries/System.Formats.Tar/tests/TarTestsBase.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Licensed to the .NET Foundation under one or more agreements.
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
Expand Down Expand Up @@ -89,6 +89,11 @@ public abstract partial class TarTestsBase : FileCleanupTestBase
protected const string PaxEaSize = "size";
protected const string PaxEaDevMajor = "devmajor";
protected const string PaxEaDevMinor = "devminor";
internal const char OneByteCharacter = 'a';
internal const char TwoBytesCharacter = 'ö';
internal const string FourBytesCharacter = "😒";
internal const char Separator = '/';
internal const int MaxPathComponent = 255;

private static readonly string[] V7TestCaseNames = new[]
{
Expand Down Expand Up @@ -622,5 +627,112 @@ public static IEnumerable<object[]> GetPaxAndGnuTestCaseNames()
yield return new object[] { name };
}
}

private static List<string> GetPrefixes()
{
List<string> prefixes = new() { "", "/", "./", "../" };

if (OperatingSystem.IsWindows())
prefixes.Add("C:/");

return prefixes;
}

internal static IEnumerable<string> GetNamesPrefixedTestData(NameCapabilities max)
{
Assert.True(Enum.IsDefined(max));
List<string> prefixes = GetPrefixes();

foreach (string prefix in prefixes)
{
int nameLength = 100 - prefix.Length;
yield return prefix + Repeat(OneByteCharacter, nameLength);
yield return prefix + Repeat(OneByteCharacter, nameLength - 2) + Repeat(TwoBytesCharacter, 1);
yield return prefix + Repeat(OneByteCharacter, nameLength - 4) + Repeat(FourBytesCharacter, 1);
}

if (max == NameCapabilities.Name)
yield break;

// test maxed out prefix.
foreach (string prefix in prefixes)
{
int directoryLength = 155 - prefix.Length;
yield return prefix + Repeat(OneByteCharacter, directoryLength) + Separator + Repeat(OneByteCharacter, 100);
yield return prefix + Repeat(OneByteCharacter, directoryLength - 2) + Repeat(TwoBytesCharacter, 1) + Separator + Repeat(OneByteCharacter, 100);
yield return prefix + Repeat(OneByteCharacter, directoryLength - 4) + Repeat(FourBytesCharacter, 1) + Separator + Repeat(OneByteCharacter, 100);
}

if (max == NameCapabilities.NameAndPrefix)
yield break;

foreach (string prefix in prefixes)
{
int directoryLength = MaxPathComponent - prefix.Length;
yield return prefix + Repeat(OneByteCharacter, directoryLength) + Separator + Repeat(OneByteCharacter, MaxPathComponent);
yield return prefix + Repeat(OneByteCharacter, directoryLength - 2) + Repeat(TwoBytesCharacter, 1) + Separator + Repeat(OneByteCharacter, MaxPathComponent);
yield return prefix + Repeat(OneByteCharacter, directoryLength - 4) + Repeat(FourBytesCharacter, 1) + Separator + Repeat(OneByteCharacter, MaxPathComponent);
}
}

internal static IEnumerable<string> GetNamesNonAsciiTestData(NameCapabilities max)
{
Assert.True(Enum.IsDefined(max));

yield return Repeat(OneByteCharacter, 100);
yield return Repeat(TwoBytesCharacter, 100 / 2);
yield return Repeat(OneByteCharacter, 2) + Repeat(TwoBytesCharacter, 49);

yield return Repeat(FourBytesCharacter, 100 / 4);
yield return Repeat(OneByteCharacter, 4) + Repeat(FourBytesCharacter, 24);

if (max == NameCapabilities.Name)
yield break;

// prefix + name
// this is 256 but is supported because prefix is not required to end in separator.
yield return Repeat(OneByteCharacter, 155) + Separator + Repeat(OneByteCharacter, 100);

// non-ascii prefix + name
yield return Repeat(TwoBytesCharacter, 155 / 2) + Separator + Repeat(OneByteCharacter, 100);
yield return Repeat(FourBytesCharacter, 155 / 4) + Separator + Repeat(OneByteCharacter, 100);

// prefix + non-ascii name
yield return Repeat(OneByteCharacter, 155) + Separator + Repeat(TwoBytesCharacter, 100 / 2);
yield return Repeat(OneByteCharacter, 155) + Separator + Repeat(FourBytesCharacter, 100 / 4);

// non-ascii prefix + non-ascii name
yield return Repeat(TwoBytesCharacter, 155 / 2) + Separator + Repeat(TwoBytesCharacter, 100 / 2);
yield return Repeat(FourBytesCharacter, 155 / 4) + Separator + Repeat(FourBytesCharacter, 100 / 4);

if (max == NameCapabilities.NameAndPrefix)
yield break;

// Pax and Gnu support unlimited paths.
yield return Repeat(OneByteCharacter, MaxPathComponent);
yield return Repeat(TwoBytesCharacter, MaxPathComponent / 2);
yield return Repeat(FourBytesCharacter, MaxPathComponent / 4);

yield return Repeat(OneByteCharacter, MaxPathComponent) + Separator + Repeat(OneByteCharacter, MaxPathComponent);
yield return Repeat(TwoBytesCharacter, MaxPathComponent / 2) + Separator + Repeat(TwoBytesCharacter, MaxPathComponent / 2);
yield return Repeat(FourBytesCharacter, MaxPathComponent / 4) + Separator + Repeat(FourBytesCharacter, MaxPathComponent / 4);
}

internal static string Repeat(char c, int count)
{
return new string(c, count);
}

internal static string Repeat(string c, int count)
{
return string.Concat(Enumerable.Repeat(c, count));
}

internal enum NameCapabilities
{
Name,
NameAndPrefix,
Unlimited
}
}
}
Loading