Skip to content
Prev Previous commit
Next Next commit
Fix truncation and prefix logic
  • Loading branch information
jozkee authored and github-actions committed Sep 28, 2022
commit d774bf0d9ca18a94de6042bf4145826c86cf2a95
4 changes: 2 additions & 2 deletions src/libraries/System.Formats.Tar/src/Resources/Strings.resx
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@
<data name="TarInvalidNumber" xml:space="preserve">
<value>Unable to parse number.</value>
</data>
<data name="TarEntryNameExceedsMaxLength" xml:space="preserve">
<value>The name exceeds the maximum allowed length for this format.</value>
<data name="TarEntryFieldExceedsMaxLength" xml:space="preserve">
<value>The field '{0}' exceeds the maximum allowed length for this format.</value>
</data>
</root>
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
using System.Buffers.Text;
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Numerics;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
Expand Down Expand Up @@ -363,79 +361,87 @@ private void WriteAsPaxSharedInternal(Span<byte> buffer, out long actualLength)
private int WriteName(Span<byte> buffer)
{
ReadOnlySpan<char> name = _name;
int utf16NameTruncatedLength = GetUtf16TruncatedTextLength(name, FieldLengths.Name);
int encodedLength = GetUtf8TextLength(name);

if (_format is TarEntryFormat.V7 && name.Length != utf16NameTruncatedLength)
if (encodedLength > FieldLengths.Name)
{
throw new ArgumentException(SR.TarEntryNameExceedsMaxLength, ArgNameEntry);
}
if (_format is TarEntryFormat.V7)
{
throw new ArgumentException(SR.Format(SR.TarEntryFieldExceedsMaxLength, nameof(TarEntry.Name)), ArgNameEntry);
}

ReadOnlySpan<char> truncatedName = name.Slice(0, utf16NameTruncatedLength);
Span<byte> destination = buffer.Slice(FieldLocations.Name, FieldLengths.Name);
int encoded = Encoding.UTF8.GetBytes(truncatedName, destination);
int utf16NameTruncatedLength = GetUtf16TruncatedTextLength(name, FieldLengths.Name);
name = name.Slice(0, utf16NameTruncatedLength);
}

return Checksum(destination.Slice(0, encoded));
return WriteAsUtf8String(name, buffer.Slice(FieldLocations.Name, FieldLengths.Name));
}

// 'https://www.freebsd.org/cgi/man.cgi?tar(5)'
// If the pathname is too long to fit in the 100 bytes provided by the standard format,
// it can be split at any / character with the first portion going into the prefix field.
private int WriteUstarName(Span<byte> buffer)
{
const int MaxPathname = FieldLengths.Prefix + 1 + FieldLengths.Name;
if (Encoding.UTF8.GetByteCount(_name) > MaxPathname)
// We can have a pathname as big as 256, prefix + '/' + name,
// the separator in between can be neglected as the reader will append it when it joins both fields.
const int MaxPathname = FieldLengths.Prefix + FieldLengths.Name + 1;

if (GetUtf8TextLength(_name) > MaxPathname)
{
throw new ArgumentException(SR.TarEntryNameExceedsMaxLength, ArgNameEntry);
throw new ArgumentException(SR.TarEntryFieldExceedsMaxLength, ArgNameEntry);
}

Span<byte> encodingBuffer = stackalloc byte[MaxPathname];
int encoded = Encoding.UTF8.GetBytes(_name, encodingBuffer);
ReadOnlySpan<byte> nameAndPrefixBytes = encodingBuffer.Slice(0, encoded);
ReadOnlySpan<byte> pathnameBytes = encodingBuffer.Slice(0, encoded);

if (nameAndPrefixBytes.Length <= FieldLengths.Name)
// If the pathname is able to fit in Name, we can write it down there and avoid calculating Prefix.
if (pathnameBytes.Length <= FieldLengths.Name)
{
return WriteLeftAlignedBytesAndGetChecksum(nameAndPrefixBytes, buffer.Slice(FieldLocations.Name, FieldLengths.Name));
return WriteLeftAlignedBytesAndGetChecksum(pathnameBytes, buffer.Slice(FieldLocations.Name, FieldLengths.Name));
}

int lastIdx = nameAndPrefixBytes.LastIndexOfAny(PathInternal.Utf8DirectorySeparators);
int lastIdx = pathnameBytes.LastIndexOfAny(PathInternal.Utf8DirectorySeparators);
scoped ReadOnlySpan<byte> name;
scoped ReadOnlySpan<byte> prefix;

if (lastIdx == -1)
if (lastIdx < 1) // splitting at the root is not allowed.
{
name = nameAndPrefixBytes;
name = pathnameBytes;
prefix = default;
}
else
{
name = nameAndPrefixBytes.Slice(lastIdx + 1);
prefix = nameAndPrefixBytes.Slice(0, Math.Max(lastIdx, 1)); // need at least the separator
name = pathnameBytes.Slice(lastIdx + 1);
prefix = pathnameBytes.Slice(0, lastIdx);
}

// At this point nameAndPrefixBytes.Length > 100.
// At this point pathnameBytes.Length > 100.
// Attempt to split it in a way it can use prefix.
while (prefix.Length - name.Length > FieldLengths.Prefix)
{
lastIdx = prefix.LastIndexOfAny(PathInternal.Utf8DirectorySeparators);
if (lastIdx < 0)
if (lastIdx < 1)
{
break;
}

name = nameAndPrefixBytes.Slice(lastIdx + 1);
prefix = nameAndPrefixBytes.Slice(0, Math.Max(lastIdx, 1)); // need at least the separator
name = pathnameBytes.Slice(lastIdx + 1);
prefix = pathnameBytes.Slice(0, lastIdx);
}

if (prefix.Length <= FieldLengths.Prefix && name.Length <= FieldLengths.Name)
{
Debug.Assert(prefix.Length != 1 || !PathInternal.Utf8DirectorySeparators.Contains(prefix[0]));

int checksum = WriteLeftAlignedBytesAndGetChecksum(prefix, buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix));
checksum += WriteLeftAlignedBytesAndGetChecksum(name, buffer.Slice(FieldLocations.Name, FieldLengths.Name));

return checksum;
}
else
{
throw new ArgumentException(SR.TarEntryNameExceedsMaxLength, ArgNameEntry);
throw new ArgumentException(SR.Format(SR.TarEntryFieldExceedsMaxLength, nameof(TarEntry.Name)), ArgNameEntry);
}
}

Expand Down Expand Up @@ -477,7 +483,20 @@ private int WriteCommonFields(Span<byte> buffer, long actualLength, TarEntryType

if (!string.IsNullOrEmpty(_linkName))
{
checksum += WriteAsUtf8String(_linkName, buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName));
ReadOnlySpan<char> linkName = _linkName;

if (GetUtf8TextLength(linkName) > FieldLengths.LinkName)
{
if (_format is not TarEntryFormat.Pax and not TarEntryFormat.Gnu)
{
throw new ArgumentException(SR.Format(SR.TarEntryFieldExceedsMaxLength, nameof(TarEntry.LinkName)), ArgNameEntry);
}

int truncatedLength = GetUtf16TruncatedTextLength(linkName, FieldLengths.LinkName);
linkName = linkName.Slice(0, truncatedLength);
}

checksum += WriteAsUtf8String(linkName, buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName));
}

return checksum;
Expand Down Expand Up @@ -521,12 +540,37 @@ private int WritePosixAndGnuSharedFields(Span<byte> buffer)

if (!string.IsNullOrEmpty(_uName))
{
checksum += WriteAsUtf8String(_uName, buffer.Slice(FieldLocations.UName, FieldLengths.UName));
ReadOnlySpan<char> uName = _uName;

if (GetUtf8TextLength(uName) > FieldLengths.UName)
{
if (_format is not TarEntryFormat.Pax)
{
throw new ArgumentException(SR.Format(SR.TarEntryFieldExceedsMaxLength, nameof(PaxTarEntry.UserName)), ArgNameEntry);
}

int truncatedLength = GetUtf16TruncatedTextLength(uName, FieldLengths.UName);
uName = uName.Slice(0, truncatedLength);
}

checksum += WriteAsUtf8String(uName, buffer.Slice(FieldLocations.UName, FieldLengths.UName));
}

if (!string.IsNullOrEmpty(_gName))
{
checksum += WriteAsUtf8String(_gName, buffer.Slice(FieldLocations.GName, FieldLengths.GName));
ReadOnlySpan<char> gName = _gName;
if (GetUtf8TextLength(gName) > FieldLengths.GName)
{
if (_format is not TarEntryFormat.Pax)
{
throw new ArgumentException(SR.Format(SR.TarEntryFieldExceedsMaxLength, nameof(PaxTarEntry.GroupName)), ArgNameEntry);
}

int truncatedLength = GetUtf16TruncatedTextLength(gName, FieldLengths.UName);
gName = gName.Slice(0, truncatedLength);
}

checksum += WriteAsUtf8String(gName, buffer.Slice(FieldLocations.GName, FieldLengths.GName));
}

if (_devMajor > 0)
Expand Down Expand Up @@ -821,10 +865,10 @@ private static int WriteAsTimestamp(DateTimeOffset timestamp, Span<byte> destina
}

// Writes the specified text as an UTF8 string aligned to the left, and returns its checksum.
private static int WriteAsUtf8String(string str, Span<byte> buffer)
private static int WriteAsUtf8String(ReadOnlySpan<char> text, Span<byte> buffer)
{
byte[] bytes = Encoding.UTF8.GetBytes(str);
return WriteLeftAlignedBytesAndGetChecksum(bytes.AsSpan(), buffer);
int encoded = Encoding.UTF8.GetBytes(text, buffer);
return WriteLeftAlignedBytesAndGetChecksum(buffer.Slice(0, encoded), buffer);
}

// Gets the special name for the 'name' field in an extended attribute entry.
Expand Down Expand Up @@ -874,15 +918,12 @@ private static string GenerateGlobalExtendedAttributeName(int globalExtendedAttr
return result;
}

private static int GetUtf8TextLength(ReadOnlySpan<char> text)
=> Encoding.UTF8.GetByteCount(text);

// Returns the text's utf16 length truncated at the specified utf8 max length.
private static int GetUtf16TruncatedTextLength(ReadOnlySpan<char> text, int utf8MaxLength)
{
// fast path, most entries will be smaller than maxLength.
if (Encoding.UTF8.GetByteCount(text) <= utf8MaxLength)
{
return text.Length;
}

int utf8Length = 0;
int utf16TruncatedLength = 0;

Expand Down
32 changes: 20 additions & 12 deletions src/libraries/System.Formats.Tar/tests/TarTestsBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ public static IEnumerable<object[]> GetPaxAndGnuTestCaseNames()

private static List<string> GetPrefixes()
{
List<string> prefixes = new() { "", "/", "./", "../" };
List<string> prefixes = new() { "", "/a/", "./", "../" };

if (OperatingSystem.IsWindows())
prefixes.Add("C:/");
Expand All @@ -643,13 +643,17 @@ internal static IEnumerable<string> GetNamesPrefixedTestData(NameCapabilities ma
Assert.True(Enum.IsDefined(max));
List<string> prefixes = GetPrefixes();

// prefix + name of length 100
foreach (string prefix in prefixes)
{
// prefix + name of length 100
int nameLength = 100 - prefix.Length;
yield return prefix + Repeat(OneByteCharacter, nameLength);
yield return prefix + Repeat(OneByteCharacter, nameLength - 2) + Repeat(TwoBytesCharacter, 1);
yield return prefix + Repeat(OneByteCharacter, nameLength - 4) + Repeat(FourBytesCharacter, 1);
yield return prefix + Repeat(OneByteCharacter, nameLength - 2) + TwoBytesCharacter;
yield return prefix + Repeat(OneByteCharacter, nameLength - 4) + FourBytesCharacter;

// prefix alone
if (prefix != string.Empty)
yield return prefix;
}

if (max == NameCapabilities.Name)
Expand All @@ -659,17 +663,17 @@ internal static IEnumerable<string> GetNamesPrefixedTestData(NameCapabilities ma
foreach (string prefix in prefixes)
{
yield return prefix + Repeat(OneByteCharacter, 100);
yield return prefix + Repeat(OneByteCharacter, 100 - 2) + Repeat(TwoBytesCharacter, 1);
yield return prefix + Repeat(OneByteCharacter, 100 - 4) + Repeat(FourBytesCharacter, 1);
yield return prefix + Repeat(OneByteCharacter, 100 - 2) + TwoBytesCharacter;
yield return prefix + Repeat(OneByteCharacter, 100 - 4) + FourBytesCharacter;
}

// maxed out prefix and name.
foreach (string prefix in prefixes)
{
int directoryLength = 155 - prefix.Length;
yield return prefix + Repeat(OneByteCharacter, directoryLength) + Separator + Repeat(OneByteCharacter, 100);
yield return prefix + Repeat(OneByteCharacter, directoryLength - 2) + Repeat(TwoBytesCharacter, 1) + Separator + Repeat(OneByteCharacter, 100);
yield return prefix + Repeat(OneByteCharacter, directoryLength - 4) + Repeat(FourBytesCharacter, 1) + Separator + Repeat(OneByteCharacter, 100);
yield return prefix + Repeat(OneByteCharacter, directoryLength - 2) + TwoBytesCharacter + Separator + Repeat(OneByteCharacter, 100);
yield return prefix + Repeat(OneByteCharacter, directoryLength - 4) + FourBytesCharacter + Separator + Repeat(OneByteCharacter, 100);
}

if (max == NameCapabilities.NameAndPrefix)
Expand All @@ -679,8 +683,8 @@ internal static IEnumerable<string> GetNamesPrefixedTestData(NameCapabilities ma
{
int directoryLength = MaxPathComponent - prefix.Length;
yield return prefix + Repeat(OneByteCharacter, directoryLength) + Separator + Repeat(OneByteCharacter, MaxPathComponent);
yield return prefix + Repeat(OneByteCharacter, directoryLength - 2) + Repeat(TwoBytesCharacter, 1) + Separator + Repeat(OneByteCharacter, MaxPathComponent);
yield return prefix + Repeat(OneByteCharacter, directoryLength - 4) + Repeat(FourBytesCharacter, 1) + Separator + Repeat(OneByteCharacter, MaxPathComponent);
yield return prefix + Repeat(OneByteCharacter, directoryLength - 2) + TwoBytesCharacter + Separator + Repeat(OneByteCharacter, MaxPathComponent);
yield return prefix + Repeat(OneByteCharacter, directoryLength - 4) + FourBytesCharacter + Separator + Repeat(OneByteCharacter, MaxPathComponent);
}
}

Expand Down Expand Up @@ -730,6 +734,10 @@ internal static IEnumerable<string> GetNamesNonAsciiTestData(NameCapabilities ma
internal static IEnumerable<string> GetTooLongNamesTestData(NameCapabilities max)
{
Assert.True(max is NameCapabilities.Name or NameCapabilities.NameAndPrefix);

// root directory can't be saved as prefix
yield return "/" + Repeat(OneByteCharacter, 100);

List<string> prefixes = GetPrefixes();

// 1. non-ascii last character doesn't fit in name.
Expand All @@ -754,8 +762,8 @@ internal static IEnumerable<string> GetTooLongNamesTestData(NameCapabilities max
yield return Repeat(OneByteCharacter, 155 - 4) + Repeat(FourBytesCharacter, 2) + Separator + maxedOutName;

// 2.2 last char doesn't fit by one byte.
yield return Repeat(OneByteCharacter, 155 - 2 + 1) + Repeat(TwoBytesCharacter, 1) + Separator + maxedOutName;
yield return Repeat(OneByteCharacter, 155 - 4 + 1) + Repeat(FourBytesCharacter, 1) + Separator + maxedOutName;
yield return Repeat(OneByteCharacter, 155 - 2 + 1) + TwoBytesCharacter + Separator + maxedOutName;
yield return Repeat(OneByteCharacter, 155 - 4 + 1) + FourBytesCharacter + Separator + maxedOutName;

if (max is NameCapabilities.NameAndPrefix)
yield break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ private TarEntry CreateTarEntryAndGetExpectedChecksum(TarEntryFormat format, Tar

if (entryType is TarEntryType.SymbolicLink)
{
expectedChecksum += GetLinkChecksum(longLink, out string linkName);
expectedChecksum += GetLinkChecksum(format, longLink, out string linkName);
entry.LinkName = linkName;
}

Expand All @@ -199,7 +199,9 @@ private int GetNameChecksum(TarEntryFormat format, bool longPath, out string ent

// V7 does not support name fields larger than 100
if (format is not TarEntryFormat.V7)
{
entryName += "/" + new string('a', 50);
}

// Gnu and Pax writes first 100 bytes in 'name' field, then the full name is written in a metadata entry that precedes this one.
if (format is TarEntryFormat.Ustar)
Expand All @@ -211,7 +213,7 @@ private int GetNameChecksum(TarEntryFormat format, bool longPath, out string ent
return expectedChecksum;
}

private int GetLinkChecksum(bool longLink, out string linkName)
private int GetLinkChecksum(TarEntryFormat format, bool longLink, out string linkName)
{
int expectedChecksum = 0;
if (!longLink)
Expand All @@ -222,12 +224,16 @@ private int GetLinkChecksum(bool longLink, out string linkName)
}
else
{
linkName = new string('a', 150);
// 100 * 97 = 9700 (first 100 bytes go into 'linkName' field)
linkName = new string('a', 100); // 100 * 97 = 9700 (first 100 bytes go into 'linkName' field)
expectedChecksum += 9700;
// - V7 and Ustar ignore the rest of the bytes
// - Pax and Gnu write first 100 bytes in 'linkName' field, then the full link name is written in the

// V7 and Ustar does not support name fields larger than 100
// Pax and Gnu write first 100 bytes in 'linkName' field, then the full link name is written in the
// preceding metadata entry (extended attributes for PAX, LongLink for GNU).
if (format is not TarEntryFormat.V7 and not TarEntryFormat.Ustar)
{
linkName += "/" + new string('a', 50);
}
}
return expectedChecksum;

Expand Down
Loading