Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
e9fbcc9
Implement Tar APIs
carlossanlop Apr 13, 2022
0731cb2
Address some comment suggestions.
carlossanlop Apr 13, 2022
8f1f1af
Add SystemFormatsTarTestData package dependency entries and versions.
carlossanlop Apr 13, 2022
10df463
Merge the major and minor p/invokes into a single one.
carlossanlop Apr 14, 2022
da4af67
Address exception and nullability suggestions.
carlossanlop Apr 14, 2022
8710ceb
Adjust tests for the latest suggestions.
carlossanlop Apr 14, 2022
76e484c
Document elevation requirement to extract device files on Unix.
carlossanlop Apr 14, 2022
c9c4d85
Add tests to verify extraction without elevation throws for device fi…
carlossanlop Apr 14, 2022
e0dafda
Add separate expected mode for special files from assets (644).
carlossanlop Apr 14, 2022
5b8392b
Bump assets version to one with the fix
carlossanlop Apr 14, 2022
a16d4d9
Fix bugs and add tests for: timestamp conversion, extended attributes…
carlossanlop Apr 15, 2022
7ca674e
Validate entry.Name chars before extracting
carlossanlop Apr 15, 2022
560b789
Remove some minor todo comments
carlossanlop Apr 15, 2022
b3e5988
Add missing numeric value for PAL_S_IFCHR and its static assert
carlossanlop Apr 15, 2022
c22a8e7
Bug fixes:
carlossanlop Apr 16, 2022
d3e4f31
Bug fix: GNU can have two different metadata entries in a row if the …
carlossanlop Apr 16, 2022
23caa17
Add test to verify entry with subdir segments gets extracted by TarFi…
carlossanlop Apr 16, 2022
e0b0442
Rename UnknownFormat resource string to TarInvalidFormat, include for…
carlossanlop Apr 16, 2022
b682576
Bug fixes:
carlossanlop Apr 17, 2022
9fd01c3
Embed paths to exception message string when extracting file to diffe…
carlossanlop Apr 17, 2022
7b0d8c3
Bug fix: WriteEntry from file on Windows failing with V7 due to incom…
carlossanlop Apr 17, 2022
f1d9b7b
Bug fixes:
carlossanlop Apr 17, 2022
0980fbd
Use HAVE* in pal_io.c to detect library where makedev is available. D…
carlossanlop Apr 18, 2022
a28f3e5
Remove unnecessary TargetPlatformIdentifier override in src csproj.
carlossanlop Apr 18, 2022
7f2e516
Small refactor of VerifyOverwriteFileIsPossible
carlossanlop Apr 18, 2022
3f71b60
Remove parameter nullchecks.
carlossanlop Apr 18, 2022
cb855ed
Remove test method for attaching debugger.
carlossanlop Apr 18, 2022
dd40447
Remove devmajor and devminor duplicate todo message
carlossanlop Apr 18, 2022
9a28f12
Remove unused datetime method
carlossanlop Apr 19, 2022
40097c6
TMP exception with failed timestamp info
carlossanlop Apr 19, 2022
b1e8dfc
Remove failing device major/minor checks for now
carlossanlop Apr 19, 2022
c11e434
add FieldLocations static class, with position of first byte of each …
carlossanlop Apr 19, 2022
255ec96
Use InvariantCulture when converting the utf8 string to a double.
carlossanlop Apr 19, 2022
8ae7788
Copying file used by ExtractToDirectory test that throws, so it doesn…
carlossanlop Apr 19, 2022
d3cbdd2
Remove unused test method that opens and returns a filestream. We don…
carlossanlop Apr 19, 2022
6b5d078
Use a single rented buffer to read a whole header record. Convert rel…
carlossanlop Apr 19, 2022
19eb98d
Use ArrayPool for writing.
carlossanlop Apr 19, 2022
69ca3e7
Fix upper bound of DeviceMajor and DeviceMinor.
carlossanlop Apr 19, 2022
b1a2d5a
Remove async APIs from ref, comment them in src.
carlossanlop Apr 19, 2022
fb644dd
Add issue to TODO comments
carlossanlop Apr 19, 2022
cd40d62
Remove unused methods.
carlossanlop Apr 19, 2022
0ee2c33
Add InvariantCulture to double.ToString conversions.
carlossanlop Apr 19, 2022
58a3476
Fix test parsing a double, ensure it uses invariant culture.
carlossanlop Apr 20, 2022
fc0e568
More tests parsing a double, ensure invariant culture comparison.
carlossanlop Apr 20, 2022
5d81577
Verify link targets when extracting to directory. Disallow extracting…
carlossanlop Apr 20, 2022
a52dfe4
Avoid advancing stream anymore if end markers were found and GetNextE…
carlossanlop Apr 20, 2022
b227fa9
Add test that verifies stream is not advanced anymore after reading t…
carlossanlop Apr 20, 2022
5745b9f
Fix bug preventing correct generation of full path for extraction. Ad…
carlossanlop Apr 21, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Bug fix: GNU can have two different metadata entries in a row if the …
…entry has a long link and a long path. Add logic to handle that and tests to verify this.
  • Loading branch information
carlossanlop committed Apr 16, 2022
commit d3e4f3121c631009a6366faff3f187a7d011bddf
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,11 @@ namespace System.Formats.Tar
// Writes header attributes of a tar archive entry.
internal partial struct TarHeader
{
private const byte SpaceChar = 0x20;
private const byte EqualsChar = 0x3d;
private const byte NewLineChar = 0xa;
private static readonly byte[] s_paxMagic = new byte[] { 0x75, 0x73, 0x74, 0x61, 0x72, 0x0 }; // "ustar\0"
private static readonly byte[] s_paxVersion = new byte[] { 0x30, 0x30 }; // "00"
private static readonly byte[] s_paxVersion = new byte[] { TarHelpers.ZeroChar, TarHelpers.ZeroChar }; // "00"

private static readonly byte[] s_gnuMagic = new byte[] { 0x75, 0x73, 0x74, 0x61, 0x72, 0x20 }; // "ustar "
private static readonly byte[] s_gnuVersion = new byte[] { 0x20, 0x0 }; // " \0"
private static readonly byte[] s_gnuMagic = new byte[] { 0x75, 0x73, 0x74, 0x61, 0x72, TarHelpers.SpaceChar }; // "ustar "
private static readonly byte[] s_gnuVersion = new byte[] { TarHelpers.SpaceChar, 0x0 }; // " \0"

// Extended Attribute entries have a special format in the Name field:
// "{dirName}/PaxHeaders.{processId}/{fileName}{trailingSeparator}"
Expand All @@ -28,6 +25,9 @@ internal partial struct TarHeader
// "{tmpFolder}/GlobalHead.{processId}.1"
private const string GlobalHeadFormat = "{0}/GlobalHead.{1}.1";

// Predefined text for the Name field of a GNU long metadata entry. Applies for both LongPath ('L') and LongLink ('K').
private const string GnuLongMetadataName = "././@LongLink";

// Creates a PAX Global Extended Attributes header and writes it into the specified archive stream.
internal static void WriteGlobalExtendedAttributesHeader(Stream archiveStream, IEnumerable<KeyValuePair<string, string>> globalExtendedAttributes)
{
Expand Down Expand Up @@ -135,6 +135,7 @@ internal void WriteAsUstar(Stream archiveStream)
}

// Writes the current header as a PAX entry into the archive stream.
// Makes sure to add the preceding exteded attributes entry before the actual entry.
internal void WriteAsPax(Stream archiveStream)
{
// First, we write the preceding extended attributes header
Expand All @@ -148,8 +149,52 @@ internal void WriteAsPax(Stream archiveStream)
WriteAsPaxInternal(archiveStream);
}

// Writes the current header as a GNU entry into the archive stream.
// Writes the current header as a Gnu entry into the archive stream.
// Makes sure to add the preceding LongLink and/or LongPath entries if necessary, before the actual entry.
internal void WriteAsGnu(Stream archiveStream)
{
// First, we determine if we need a preceding LongLink, and write it if needed
if (_linkName.Length > FieldLengths.LinkName)
{
TarHeader longLinkHeader = GetGnuLongMetadataHeader(TarEntryType.LongLink, _linkName);
longLinkHeader.WriteAsGnuInternal(archiveStream);
}

// Second, we determine if we need a preceding LongPath, and write it if needed
if (_name.Length > FieldLengths.Name)
{
TarHeader longPathHeader = GetGnuLongMetadataHeader(TarEntryType.LongPath, _name);
longPathHeader.WriteAsGnuInternal(archiveStream);
}

// Third, we write this header as a normal one
WriteAsGnuInternal(archiveStream);
}

// Creates and returns a GNU long metadata header, with the specified long text written into its data stream.
private static TarHeader GetGnuLongMetadataHeader(TarEntryType entryType, string longText)
{
Debug.Assert((entryType is TarEntryType.LongPath && longText.Length > FieldLengths.Name) ||
(entryType is TarEntryType.LongLink && longText.Length > FieldLengths.LinkName));

TarHeader longMetadataHeader = default;

longMetadataHeader._name = GnuLongMetadataName; // Same name for both longpath or longlink
longMetadataHeader._mode = (int)TarHelpers.DefaultMode;
longMetadataHeader._uid = 0;
longMetadataHeader._gid = 0;
longMetadataHeader._mTime = DateTimeOffset.MinValue; // 0
longMetadataHeader._typeFlag = entryType;

longMetadataHeader._dataStream = new MemoryStream();
longMetadataHeader._dataStream.Write(Encoding.UTF8.GetBytes(longText));
Comment on lines +140 to +141
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adamsitnik, an example for where a StringStream would be useful.

longMetadataHeader._dataStream.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning

return longMetadataHeader;
}

// Writes the current header as a GNU entry into the archive stream.
internal void WriteAsGnuInternal(Stream archiveStream)
{
byte[] nameBytes = new byte[FieldLengths.Name];
byte[] modeBytes = new byte[FieldLengths.Mode];
Expand Down Expand Up @@ -524,11 +569,11 @@ private static byte[] GenerateExtendedAttributeKeyValuePairAsByteArray(byte[] ke
List<byte> bytesList = new();

bytesList.AddRange(finalTotalCharCountBytes);
bytesList.Add(SpaceChar);
bytesList.Add(TarHelpers.SpaceChar);
bytesList.AddRange(keyBytes);
bytesList.Add(EqualsChar);
bytesList.Add(TarHelpers.EqualsChar);
bytesList.AddRange(valueBytes);
bytesList.Add(NewLineChar);
bytesList.Add(TarHelpers.NewLineChar);

Debug.Assert(bytesList.Count == (realTotalCharCount + suffixByteCount));

Expand All @@ -542,12 +587,12 @@ internal static int SaveChecksumBytes(int checksum, Span<byte> destination)
{
// The checksum field is also counted towards the total sum
// but as an array filled with spaces
checksum += SpaceChar * 8;
checksum += TarHelpers.SpaceChar * 8;

byte[] converted = TarHelpers.GetAsciiBytes(TarHelpers.ConvertDecimalToOctal(checksum));

// Checksum field ends with a null and a space
destination[^1] = SpaceChar; // ' '
destination[^1] = TarHelpers.SpaceChar; // ' '
destination[^2] = 0; // '\0'

int i = destination.Length - 3;
Expand All @@ -562,7 +607,7 @@ internal static int SaveChecksumBytes(int checksum, Span<byte> destination)
}
else
{
destination[i] = 0x30; // Leading zero chars '0'
destination[i] = TarHelpers.ZeroChar; // Leading zero chars '0'
}
i--;
}
Expand Down Expand Up @@ -616,7 +661,7 @@ private static int WriteRightAlignedBytesAndGetChecksum(ReadOnlySpan<byte> bytes
}
else
{
destination[i] = ZeroChar; // leading zeros
destination[i] = TarHelpers.ZeroChar; // leading zeros
}
checksum += destination[i];
i--;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@ namespace System.Formats.Tar
// Documentation: https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
internal partial struct TarHeader
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this struct seems to be very large and I would not expect it to get us some perf. What are we getting by keeping it a struct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you prefer if I use class? And maybe split it into internal class TarHeaderRead and internal class TarHeaderWrite?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I expect us to get no perf benefits from keeping it struct, while we can run into issues like modifying a copy instead:

TarHeader header = new();
header._mode = 1;

SetMode(header);

Console.WriteLine(header._mode); // prints 1 as we have passed struct copy to SetMode and modified a copy

static void SetMode(TarHeader h) => h._mode = 2;

{
// POSIX fields (shared by Ustar and PAX)
private const string UstarMagic = "ustar\0";
private const string UstarVersion = "00";

// GNU-specific fields
private const string GnuMagic = "ustar ";
private const string GnuVersion = " \0";

Expand All @@ -35,9 +38,6 @@ internal partial struct TarHeader
private const string PaxEaDevMajor = "devmajor";
private const string PaxEaDevMinor = "devminor";

private const int ZeroChar = 0x30;

//private TarBlocks _blocks;
internal Stream? _dataStream;

// Position in the stream where the data ends in this header.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ internal static class TarHelpers
internal const short RecordSize = 512;
internal const int MaxBufferLength = 4096;

internal const int ZeroChar = 0x30;
internal const byte SpaceChar = 0x20;
internal const byte EqualsChar = 0x3d;
internal const byte NewLineChar = 0xa;

internal const TarFileMode DefaultMode = // 644 in octal
TarFileMode.UserRead | TarFileMode.UserWrite | TarFileMode.GroupRead | TarFileMode.OtherRead;

Expand Down
172 changes: 124 additions & 48 deletions src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -260,60 +260,24 @@ private bool TryGetNextEntryHeader(out TarHeader header, bool copyData)
}

// If a metadata typeflag entry is retrieved, handle it here, then read the next entry
if (header._typeFlag is TarEntryType.ExtendedAttributes or TarEntryType.LongLink or TarEntryType.LongPath)
{
TarHeader actualEntryHeader = default;

// We should know by now the format of the archive based on the first retrieved entry
actualEntryHeader._format = header._format;

// Now get the actual entry
if (!actualEntryHeader.TryGetNextHeader(_archiveStream, copyData))
// PAX metadata
if (header._typeFlag is TarEntryType.ExtendedAttributes)
{
if (!TryProcessExtendedAttributesHeader(header, copyData, out TarHeader mainHeader))
{
return false;
}

// Should never read a GEA entry at this point
if (header._typeFlag == TarEntryType.GlobalExtendedAttributes)
{
throw new FormatException(SR.TarTooManyGlobalExtendedAttributesEntries);
}

// Can't have two metadata entries in a row, no matter the archive format
if (actualEntryHeader._typeFlag is TarEntryType.ExtendedAttributes or TarEntryType.LongLink or TarEntryType.LongPath)
{
throw new FormatException(string.Format(SR.TarUnexpectedMetadataEntry, actualEntryHeader._typeFlag, header._typeFlag));
}

// Handle metadata entry types
switch (header._typeFlag)
header = mainHeader;
}
// GNU metadata
else if (header._typeFlag is TarEntryType.LongLink or TarEntryType.LongPath)
{
if (!TryProcessGnuMetadataHeader(header, copyData, out TarHeader mainHeader))
{
case TarEntryType.ExtendedAttributes: // pax
Debug.Assert(header._extendedAttributes != null);
if (GlobalExtendedAttributes != null)
{
// First, replace some of the entry's standard attributes with the global ones
actualEntryHeader.ReplaceNormalAttributesWithGlobalExtended(GlobalExtendedAttributes);
}
// Then replace all the standard attributes with the extended attributes ones,
// overwriting the previous global replacements if needed
actualEntryHeader.ReplaceNormalAttributesWithExtended(header._extendedAttributes);
break;

case TarEntryType.LongLink: // gnu
Debug.Assert(header._linkName != null);
// Replace with longer, complete path
actualEntryHeader._linkName = header._linkName;
break;

case TarEntryType.LongPath: // gnu
Debug.Assert(header._name != null);
// Replace with longer, complete path
actualEntryHeader._name = header._name;
break;
return false;
}

header = actualEntryHeader;
header = mainHeader;
}

// Common fields should always acquire a value
Expand All @@ -330,6 +294,118 @@ private bool TryGetNextEntryHeader(out TarHeader header, bool copyData)
return true;
}

private bool TryProcessExtendedAttributesHeader(TarHeader firstHeader, bool copyData, out TarHeader secondHeader)
{
secondHeader = default;
secondHeader._format = TarFormat.Pax;

// Now get the actual entry
if (!secondHeader.TryGetNextHeader(_archiveStream, copyData))
{
return false;
}

// Should never read a GEA entry at this point
if (secondHeader._typeFlag == TarEntryType.GlobalExtendedAttributes)
{
throw new FormatException(SR.TarTooManyGlobalExtendedAttributesEntries);
}

// Can't have two metadata entries in a row, no matter the archive format
if (secondHeader._typeFlag is TarEntryType.ExtendedAttributes)
{
throw new FormatException(string.Format(SR.TarUnexpectedMetadataEntry, TarEntryType.ExtendedAttributes, TarEntryType.ExtendedAttributes));
}

Debug.Assert(firstHeader._extendedAttributes != null);
if (GlobalExtendedAttributes != null)
{
// First, replace some of the entry's standard attributes with the global ones
secondHeader.ReplaceNormalAttributesWithGlobalExtended(GlobalExtendedAttributes);
}
// Then replace all the standard attributes with the extended attributes ones,
// overwriting the previous global replacements if needed
secondHeader.ReplaceNormalAttributesWithExtended(firstHeader._extendedAttributes);

return true;
}

private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out TarHeader finalHeader)
{
finalHeader = default;

TarHeader secondHeader = default;
secondHeader._format = TarFormat.Gnu;

// Get the second entry, which is the actual entry
if (!secondHeader.TryGetNextHeader(_archiveStream, copyData))
{
return false;
}

// Can't have two identical metadata entries in a row
if (secondHeader._typeFlag == header._typeFlag)
{
throw new FormatException(string.Format(SR.TarUnexpectedMetadataEntry, secondHeader._typeFlag, header._typeFlag));
}

// It's possible to have the two different metadata entries in a row
if ((header._typeFlag is TarEntryType.LongLink && secondHeader._typeFlag is TarEntryType.LongPath) ||
(header._typeFlag is TarEntryType.LongPath && secondHeader._typeFlag is TarEntryType.LongLink))
{
TarHeader thirdHeader = default;
thirdHeader._format = TarFormat.Gnu;

// Get the third entry, which is the actual entry
if (!thirdHeader.TryGetNextHeader(_archiveStream, copyData))
{
return false;
}

// Can't have three GNU metadata entries in a row
if (thirdHeader._typeFlag is TarEntryType.LongLink or TarEntryType.LongPath)
{
throw new FormatException(string.Format(SR.TarUnexpectedMetadataEntry, thirdHeader._typeFlag, secondHeader._typeFlag));
}

if (header._typeFlag is TarEntryType.LongLink)
{
Debug.Assert(header._linkName != null);
Debug.Assert(secondHeader._name != null);

thirdHeader._linkName = header._linkName;
thirdHeader._name = secondHeader._name;
}
else if (header._typeFlag is TarEntryType.LongPath)
{
Debug.Assert(header._name != null);
Debug.Assert(secondHeader._linkName != null);
thirdHeader._name = header._name;
thirdHeader._linkName = secondHeader._linkName;
}

finalHeader = thirdHeader;
}
// Only one metadata entry was found
else
{
if (header._typeFlag is TarEntryType.LongLink)
{
Debug.Assert(header._linkName != null);
secondHeader._linkName = header._linkName;
}
else if (header._typeFlag is TarEntryType.LongPath)
{
Debug.Assert(header._name != null);
secondHeader._name = header._name;
}

finalHeader = secondHeader;
}

return true;
}

// If the current entry contains a non-null DataStream, that stream gets added to an internal
// list of streams that need to be disposed when this TarReader instance gets disposed.
private void PreserveDataStreamForDisposalIfNeeded(TarEntry entry)
Expand Down
Loading