Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
d52d836
Avoid unnecessary byte[] allocations
stephentoub Aug 19, 2022
1e5020e
Remove unnecessary use of FileStreamOptions
stephentoub Aug 19, 2022
6938c77
Clean up Dispose{Async} implementations
stephentoub Aug 19, 2022
5fa03e9
Clean up unnecessary consts
stephentoub Aug 19, 2022
8dd0ac1
Remove MemoryStream/Encoding.UTF8.GetBytes allocations, unnecessary a…
stephentoub Aug 19, 2022
5be57ad
Avoid string allocations in ReadMagicAttribute
stephentoub Aug 19, 2022
ab71e6c
Avoid allocation in WriteAsOctal
stephentoub Aug 19, 2022
df2d742
Improve handling of octal
stephentoub Aug 19, 2022
c6058bd
Avoid allocation for version string
stephentoub Aug 19, 2022
5756a8c
Removing boxing and char string allocation in GenerateExtendedAttribu…
stephentoub Aug 19, 2022
9539a4a
Fix a couple unnecessary dictionary lookups
stephentoub Aug 19, 2022
74bbc9c
Replace Enum.HasFlag usage
stephentoub Aug 19, 2022
46e0855
Remove allocations from Write{Posix}Name
stephentoub Aug 19, 2022
02ca7da
Replace ArrayPool use with string.Create
stephentoub Aug 19, 2022
f9eb99f
Replace more superfluous ArrayPool usage
stephentoub Aug 19, 2022
add6179
Remove ArrayPool use from System.IO.Compression.ZipFile
stephentoub Aug 20, 2022
6f8cb75
Fix inverted condition
stephentoub Aug 20, 2022
827a588
Use generic math to parse octal
stephentoub Aug 20, 2022
ae21478
Remove allocations from StringReader and string.Split
stephentoub Aug 20, 2022
d6b6727
Remove magic string allocation for Ustar when not V7
stephentoub Aug 20, 2022
480af5c
Remove file name and directory name allocation in GenerateExtendedAtt…
stephentoub Aug 20, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Replace more superfluous ArrayPool usage
  • Loading branch information
stephentoub committed Aug 19, 2022
commit f9eb99f6d10079b4b0af9464d7c1d0fea5582f21
Original file line number Diff line number Diff line change
Expand Up @@ -535,13 +535,20 @@ private void ReadExtendedAttributesBlock(Stream archiveStream)
if (_size != 0)
{
ValidateSize();
byte[] buffer = ArrayPool<byte>.Shared.Rent((int)_size);
Span<byte> span = buffer.AsSpan(0, (int)_size);

byte[]? buffer = null;
Span<byte> span = _size <= 256 ?
stackalloc byte[256] :
(buffer = ArrayPool<byte>.Shared.Rent((int)_size));
span = span.Slice(0, (int)_size);

archiveStream.ReadExactly(span);
ReadExtendedAttributesFromBuffer(span, _name);

ArrayPool<byte>.Shared.Return(buffer);
if (buffer is not null)
{
ArrayPool<byte>.Shared.Return(buffer);
}
}
}

Expand Down Expand Up @@ -600,13 +607,20 @@ private void ReadGnuLongPathDataBlock(Stream archiveStream)
if (_size != 0)
{
ValidateSize();
byte[] buffer = ArrayPool<byte>.Shared.Rent((int)_size);
Span<byte> span = buffer.AsSpan(0, (int)_size);

byte[]? buffer = null;
Span<byte> span = _size <= 256 ?
stackalloc byte[256] :
(buffer = ArrayPool<byte>.Shared.Rent((int)_size));
span = span.Slice(0, (int)_size);

archiveStream.ReadExactly(span);
ReadGnuLongPathDataFromBuffer(span);

ArrayPool<byte>.Shared.Return(buffer);
if (buffer is not null)
{
ArrayPool<byte>.Shared.Return(buffer);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -531,8 +531,8 @@ private static async Task WriteDataAsync(Stream archiveStream, Stream dataStream
if (paddingAfterData != 0)
{
byte[] buffer = ArrayPool<byte>.Shared.Rent(paddingAfterData);

Array.Clear(buffer, 0, paddingAfterData);

await archiveStream.WriteAsync(buffer.AsMemory(0, paddingAfterData), cancellationToken).ConfigureAwait(false);

ArrayPool<byte>.Shared.Return(buffer);
Expand All @@ -542,9 +542,11 @@ private static async Task WriteDataAsync(Stream archiveStream, Stream dataStream
// Dumps into the archive stream an extended attribute entry containing metadata of the entry it precedes.
private static Stream? GenerateExtendedAttributesDataStream(Dictionary<string, string> extendedAttributes)
{
byte[]? buffer = null;
MemoryStream? dataStream = null;

byte[]? buffer = null;
Span<byte> span = stackalloc byte[512];

if (extendedAttributes.Count > 0)
{
dataStream = new MemoryStream();
Expand All @@ -561,26 +563,27 @@ private static async Task WriteDataAsync(Stream archiveStream, Stream dataStream
length += CountDigits(length);
Copy link
Member Author

@stephentoub stephentoub Aug 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was meant to model the logic previously there, but I think this logic needs to be tweaked, and it should be more like:

int digitCount = CountDigits(length) ;
length += digitCount;
length += CountDigits(length) - digitCount; // account for possible digit length increase

Do we have tests for this stuff? Do we validate that the archives at produce are readable by other tools? Our own code appears to ignore this length when reading archives (maybe it shouldn't?)

Copy link
Member Author

@stephentoub stephentoub Aug 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we even sure the length is supposed to include itself? Have you seen that being done in archives produced by other tools? That would be a very strange format design.

Copy link
Contributor

@carlossanlop carlossanlop Aug 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From man tar 5:

The extended attributes themselves are stored as a series of text-format lines encoded in the portable UTF-8 encoding. Each line consists of a decimal number, a space, a key string, an equals sign, a value string, and a new line. The decimal number indicates the length of the entire line, including the initial length field and the trailing newline. An example of such a field is:

25 ctime=1084839148.1212\n

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree it's strange design.

I did verify that the tar tool was able to read our archives containing extended attributes entries.

Since we know the length of the data section, I didn't feel it was too important to verify that the length of each extended attribute entry was correct, considering that we need to look for a mandatory newline char suffix. Maybe if we checked the length number and advanced the position instead of searching for the newline, we could improve perf a bit. I can investigate if this is true and determine if we need to submit a PR to verify the length number.


// Get a large enough buffer if we don't already have one.
if (buffer is null || buffer.Length < length)
if (span.Length < length)
{
if (buffer is not null)
{
ArrayPool<byte>.Shared.Return(buffer);
}
buffer = ArrayPool<byte>.Shared.Rent(length);
span = buffer;
}

// Format the contents.
bool formatted = Utf8Formatter.TryFormat(length, buffer, out int bytesWritten);
bool formatted = Utf8Formatter.TryFormat(length, span, out int bytesWritten);
Debug.Assert(formatted);
buffer[bytesWritten++] = (byte)' ';
bytesWritten += Encoding.UTF8.GetBytes(attribute, buffer.AsSpan(bytesWritten));
buffer[bytesWritten++] = (byte)'=';
bytesWritten += Encoding.UTF8.GetBytes(value, buffer.AsSpan(bytesWritten));
buffer[bytesWritten++] = (byte)'\n';
span[bytesWritten++] = (byte)' ';
bytesWritten += Encoding.UTF8.GetBytes(attribute, span.Slice(bytesWritten));
span[bytesWritten++] = (byte)'=';
bytesWritten += Encoding.UTF8.GetBytes(value, span.Slice(bytesWritten));
span[bytesWritten++] = (byte)'\n';

// Write it to the stream.
dataStream.Write(buffer.AsSpan(0, bytesWritten));
dataStream.Write(span.Slice(0, bytesWritten));
}

dataStream.Position = 0; // Ensure it gets written into the archive from the beginning
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,40 +268,37 @@ public Task WriteEntryAsync(TarEntry entry, CancellationToken cancellationToken
// Portion of the WriteEntry(entry) method that rents a buffer and writes to the archive.
private void WriteEntryInternal(TarEntry entry)
{
byte[] rented = ArrayPool<byte>.Shared.Rent(minimumLength: TarHelpers.RecordSize);
Span<byte> buffer = rented.AsSpan(0, TarHelpers.RecordSize); // minimumLength means the array could've been larger
buffer.Clear(); // Rented arrays aren't clean
try
{
switch (entry.Format)
{
case TarEntryFormat.V7:
entry._header.WriteAsV7(_archiveStream, buffer);
break;
case TarEntryFormat.Ustar:
entry._header.WriteAsUstar(_archiveStream, buffer);
break;
case TarEntryFormat.Pax:
if (entry._header._typeFlag is TarEntryType.GlobalExtendedAttributes)
{
entry._header.WriteAsPaxGlobalExtendedAttributes(_archiveStream, buffer, _nextGlobalExtendedAttributesEntryNumber++);
}
else
{
entry._header.WriteAsPax(_archiveStream, buffer);
}
break;
case TarEntryFormat.Gnu:
entry._header.WriteAsGnu(_archiveStream, buffer);
break;
default:
Debug.Assert(entry.Format == TarEntryFormat.Unknown, "Missing format handler");
throw new FormatException(string.Format(SR.TarInvalidFormat, Format));
}
}
finally
Span<byte> buffer = stackalloc byte[TarHelpers.RecordSize];
buffer.Clear();

switch (entry.Format)
{
ArrayPool<byte>.Shared.Return(rented);
case TarEntryFormat.V7:
entry._header.WriteAsV7(_archiveStream, buffer);
break;

case TarEntryFormat.Ustar:
entry._header.WriteAsUstar(_archiveStream, buffer);
break;

case TarEntryFormat.Pax:
if (entry._header._typeFlag is TarEntryType.GlobalExtendedAttributes)
{
entry._header.WriteAsPaxGlobalExtendedAttributes(_archiveStream, buffer, _nextGlobalExtendedAttributesEntryNumber++);
}
else
{
entry._header.WriteAsPax(_archiveStream, buffer);
}
break;

case TarEntryFormat.Gnu:
entry._header.WriteAsGnu(_archiveStream, buffer);
break;

default:
Debug.Assert(entry.Format == TarEntryFormat.Unknown, "Missing format handler");
throw new FormatException(string.Format(SR.TarInvalidFormat, Format));
}

_wroteEntries = true;
Expand Down