Skip to content

Commit ca6bbf1

Browse files
authored
Improve performance of Tar library (#74281)
* Avoid unnecessary byte[] allocations * Remove unnecessary use of FileStreamOptions * Clean up Dispose{Async} implementations * Clean up unnecessary consts Not a perf thing, just readability. * Remove MemoryStream/Encoding.UTF8.GetBytes allocations, unnecessary async variants, and overhaul GenerateExtendedAttributesDataStream * Avoid string allocations in ReadMagicAttribute * Avoid allocation in WriteAsOctal * Improve handling of octal * Avoid allocation for version string * Removing boxing and char string allocation in GenerateExtendedAttributeName * Fix a couple unnecessary dictionary lookups * Replace Enum.HasFlag usage * Remove allocations from Write{Posix}Name * Replace ArrayPool use with string.Create * Replace more superfluous ArrayPool usage * Remove ArrayPool use from System.IO.Compression.ZipFile * Fix inverted condition * Use generic math to parse octal * Remove allocations from StringReader and string.Split * Remove magic string allocation for Ustar when not V7 * Remove file name and directory name allocation in GenerateExtendedAttributeName
1 parent 729844a commit ca6bbf1

File tree

12 files changed

+511
-576
lines changed

12 files changed

+511
-576
lines changed

src/libraries/Common/src/System/IO/Archiving.Utils.cs

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,9 @@ internal static partial class ArchivingUtils
1515
private const char PathSeparatorChar = '/';
1616
private const string PathSeparatorString = "/";
1717

18-
public static string EntryFromPath(string entry, int offset, int length, ref char[] buffer, bool appendPathSeparator = false)
18+
public static string EntryFromPath(string entry, int offset, int length, bool appendPathSeparator = false)
1919
{
2020
Debug.Assert(length <= entry.Length - offset);
21-
Debug.Assert(buffer != null);
2221

2322
// Remove any leading slashes from the entry name:
2423
while (length > 0)
@@ -32,26 +31,36 @@ public static string EntryFromPath(string entry, int offset, int length, ref cha
3231
}
3332

3433
if (length == 0)
34+
{
3535
return appendPathSeparator ? PathSeparatorString : string.Empty;
36+
}
3637

37-
int resultLength = appendPathSeparator ? length + 1 : length;
38-
EnsureCapacity(ref buffer, resultLength);
39-
entry.CopyTo(offset, buffer, 0, length);
40-
41-
// '/' is a more broadly recognized directory separator on all platforms (eg: mac, linux)
42-
// We don't use Path.DirectorySeparatorChar or AltDirectorySeparatorChar because this is
43-
// explicitly trying to standardize to '/'
44-
for (int i = 0; i < length; i++)
38+
if (appendPathSeparator)
4539
{
46-
char ch = buffer[i];
47-
if (ch == Path.DirectorySeparatorChar || ch == Path.AltDirectorySeparatorChar)
48-
buffer[i] = PathSeparatorChar;
40+
length++;
4941
}
5042

51-
if (appendPathSeparator)
52-
buffer[length] = PathSeparatorChar;
43+
return string.Create(length, (appendPathSeparator, offset, entry), static (dest, state) =>
44+
{
45+
state.entry.AsSpan(state.offset).CopyTo(dest);
46+
47+
// '/' is a more broadly recognized directory separator on all platforms (eg: mac, linux)
48+
// We don't use Path.DirectorySeparatorChar or AltDirectorySeparatorChar because this is
49+
// explicitly trying to standardize to '/'
50+
for (int i = 0; i < dest.Length; i++)
51+
{
52+
char ch = dest[i];
53+
if (ch == Path.DirectorySeparatorChar || ch == Path.AltDirectorySeparatorChar)
54+
{
55+
dest[i] = PathSeparatorChar;
56+
}
57+
}
5358

54-
return new string(buffer, 0, resultLength);
59+
if (state.appendPathSeparator)
60+
{
61+
dest[^1] = PathSeparatorChar;
62+
}
63+
});
5564
}
5665

5766
public static void EnsureCapacity(ref char[] buffer, int min)

src/libraries/System.Formats.Tar/src/Resources/Strings.resx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,4 +255,7 @@
255255
<data name="IO_SeekBeforeBegin" xml:space="preserve">
256256
<value>An attempt was made to move the position before the beginning of the stream.</value>
257257
</data>
258+
<data name="TarInvalidNumber" xml:space="preserve">
259+
<value>Unable to parse number.</value>
260+
</data>
258261
</root>

src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,10 +188,7 @@ public override Task FlushAsync(CancellationToken cancellationToken) =>
188188
// the substream is just 'a chunk' of the super-stream
189189
protected override void Dispose(bool disposing)
190190
{
191-
if (disposing && !_isDisposed)
192-
{
193-
_isDisposed = true;
194-
}
191+
_isDisposed = true;
195192
base.Dispose(disposing);
196193
}
197194
}

src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs

Lines changed: 15 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,6 @@ namespace System.Formats.Tar
1515
/// </summary>
1616
public static class TarFile
1717
{
18-
// Windows' MaxPath (260) is used as an arbitrary default capacity, as it is likely
19-
// to be greater than the length of typical entry names from the file system, even
20-
// on non-Windows platforms. The capacity will be increased, if needed.
21-
private const int DefaultCapacity = 260;
22-
2318
/// <summary>
2419
/// Creates a tar stream that contains all the filesystem entries from the specified directory.
2520
/// </summary>
@@ -283,23 +278,14 @@ private static void CreateFromDirectoryInternal(string sourceDirectoryName, Stre
283278
DirectoryInfo di = new(sourceDirectoryName);
284279
string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory);
285280

286-
char[] entryNameBuffer = ArrayPool<char>.Shared.Rent(DefaultCapacity);
287-
288-
try
281+
if (includeBaseDirectory)
289282
{
290-
if (includeBaseDirectory)
291-
{
292-
writer.WriteEntry(di.FullName, GetEntryNameForBaseDirectory(di.Name, ref entryNameBuffer));
293-
}
294-
295-
foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories))
296-
{
297-
writer.WriteEntry(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length, ref entryNameBuffer));
298-
}
283+
writer.WriteEntry(di.FullName, GetEntryNameForBaseDirectory(di.Name));
299284
}
300-
finally
285+
286+
foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories))
301287
{
302-
ArrayPool<char>.Shared.Return(entryNameBuffer);
288+
writer.WriteEntry(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length));
303289
}
304290
}
305291
}
@@ -339,23 +325,14 @@ private static async Task CreateFromDirectoryInternalAsync(string sourceDirector
339325
DirectoryInfo di = new(sourceDirectoryName);
340326
string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory);
341327

342-
char[] entryNameBuffer = ArrayPool<char>.Shared.Rent(DefaultCapacity);
343-
344-
try
328+
if (includeBaseDirectory)
345329
{
346-
if (includeBaseDirectory)
347-
{
348-
await writer.WriteEntryAsync(di.FullName, GetEntryNameForBaseDirectory(di.Name, ref entryNameBuffer), cancellationToken).ConfigureAwait(false);
349-
}
350-
351-
foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories))
352-
{
353-
await writer.WriteEntryAsync(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length, ref entryNameBuffer), cancellationToken).ConfigureAwait(false);
354-
}
330+
await writer.WriteEntryAsync(di.FullName, GetEntryNameForBaseDirectory(di.Name), cancellationToken).ConfigureAwait(false);
355331
}
356-
finally
332+
333+
foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories))
357334
{
358-
ArrayPool<char>.Shared.Return(entryNameBuffer);
335+
await writer.WriteEntryAsync(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length), cancellationToken).ConfigureAwait(false);
359336
}
360337
}
361338
}
@@ -365,18 +342,18 @@ private static string GetBasePathForCreateFromDirectory(DirectoryInfo di, bool i
365342
includeBaseDirectory && di.Parent != null ? di.Parent.FullName : di.FullName;
366343

367344
// Constructs the entry name used for a filesystem entry when creating an archive.
368-
private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength, ref char[] entryNameBuffer)
345+
private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength)
369346
{
370347
int entryNameLength = file.FullName.Length - basePathLength;
371348
Debug.Assert(entryNameLength > 0);
372349

373-
bool isDirectory = file.Attributes.HasFlag(FileAttributes.Directory);
374-
return ArchivingUtils.EntryFromPath(file.FullName, basePathLength, entryNameLength, ref entryNameBuffer, appendPathSeparator: isDirectory);
350+
bool isDirectory = (file.Attributes & FileAttributes.Directory) != 0;
351+
return ArchivingUtils.EntryFromPath(file.FullName, basePathLength, entryNameLength, appendPathSeparator: isDirectory);
375352
}
376353

377-
private static string GetEntryNameForBaseDirectory(string name, ref char[] entryNameBuffer)
354+
private static string GetEntryNameForBaseDirectory(string name)
378355
{
379-
return ArchivingUtils.EntryFromPath(name, 0, name.Length, ref entryNameBuffer, appendPathSeparator: true);
356+
return ArchivingUtils.EntryFromPath(name, 0, name.Length, appendPathSeparator: true);
380357
}
381358

382359
// Extracts an archive into the specified directory.

0 commit comments

Comments
 (0)