Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
08f2a8b
ref: Add public APIs.
carlossanlop Sep 22, 2021
973fc8c
src: Expose the archive and entry comments.
carlossanlop Sep 22, 2021
be54e4a
tests: Add update tests for archives and for entries. They cover crea…
carlossanlop Sep 22, 2021
28467ae
Fix encoding detection feedback
carlossanlop Sep 28, 2021
297b9e5
Fix encoding detection feedback
carlossanlop Sep 28, 2021
338b184
Address suggestions
carlossanlop Oct 6, 2021
31a2227
Switch names of archive comment fields.
carlossanlop Oct 18, 2021
78d7e37
Address unicode bit flag sharing problem.
carlossanlop Oct 19, 2021
0a29149
Add more test cases
carlossanlop Oct 19, 2021
b93305b
Adjust tests
carlossanlop Oct 19, 2021
7496628
Add newline so comment only applies to one line
carlossanlop Oct 19, 2021
a949883
Ensure string byte truncation is aligned to encoding's char size.
carlossanlop Nov 27, 2021
9738c30
Remove empty check for non-nullable string. Also remove unnecessary D…
carlossanlop Nov 29, 2021
5179ea2
Defer calculation of truncated encoding string to getter and to writ…
carlossanlop Dec 4, 2021
0e47de4
Rename test arguments
carlossanlop Dec 6, 2021
18bbcd4
Only use bytes[]
carlossanlop Dec 7, 2021
31602b3
Remove unnecessary bit comment
carlossanlop Dec 7, 2021
0ae8a64
Remove unnecessary length check
carlossanlop Dec 7, 2021
e9d5b67
Address feedback
carlossanlop Dec 8, 2021
77b6ee5
Suggestion by adamsitnik: write only if length > 0
carlossanlop Dec 7, 2021
8daedcf
Simplify EntryName code
carlossanlop Dec 8, 2021
1596bb3
Move entryName code to original location
carlossanlop Dec 8, 2021
3444631
In UTF8, use Runes to detect code point length to prevent truncating …
carlossanlop Jan 19, 2022
0749c84
Address suggestions
carlossanlop Feb 3, 2022
b08f0ac
Move ZipTestHelper back to its original position because S.IO.Compres…
carlossanlop Feb 3, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Defer calculation of truncated encoding string to getter and to writ…
…ing action when disposing.
  • Loading branch information
carlossanlop committed Jan 18, 2022
commit 5179ea2896bf797490500fb117090f8deba72d00
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ public class ZipArchive : IDisposable
private uint _numberOfThisDisk; //only valid after ReadCentralDirectory
private long _expectedNumberOfEntries;
private Stream? _backingStream;
private byte[]? _archiveComment;
private string? _archiveCommentAsString;
private string? _archiveComment;
private Encoding? _entryNameAndCommentEncoding;

#if DEBUG_FORCE_ZIP64
Expand Down Expand Up @@ -176,7 +175,6 @@ public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding?
_isDisposed = false;
_numberOfThisDisk = 0; // invalid until ReadCentralDirectory
_archiveComment = null;
_archiveCommentAsString = null;

switch (mode)
{
Expand Down Expand Up @@ -216,14 +214,16 @@ public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding?

/// <summary>
/// Gets or sets the optional archive comment.
/// The comment encoding is determined by the <c>entryNameEncoding</c> parameter of the <see cref="ZipArchive(Stream,ZipArchiveMode,bool,Encoding?)"/> constructor.
/// </summary>
/// <remarks>
/// The comment encoding is determined by the <c>entryNameEncoding</c> parameter of the <see cref="ZipArchive(Stream,ZipArchiveMode,bool,Encoding?)"/> constructor.
/// If the comment byte length is larger than <see cref="ushort.MaxValue"/>, it will be truncated when disposing the archive.
/// </remarks>
[AllowNull]
public string Comment
{
get => _archiveCommentAsString ?? string.Empty;
set => (_archiveCommentAsString, _archiveComment) =
ZipHelper.EncodeAndTruncateStringToBytes(value, EntryNameAndCommentEncoding, ZipEndOfCentralDirectoryBlock.ZipFileCommentMaxLength, out _);
get => ZipHelper.GetTruncatedComment(_archiveComment, EntryNameAndCommentEncoding, ZipEndOfCentralDirectoryBlock.ZipFileCommentMaxLength);
set => _archiveComment = value ?? string.Empty;
}

/// <summary>
Expand Down Expand Up @@ -565,16 +565,12 @@ private void ReadEndOfCentralDirectory()

if (eocd.ArchiveComment.Length > 0)
{
_archiveComment = eocd.ArchiveComment;
_archiveCommentAsString = ZipHelper.TruncateEncodedBytesAndReturnString(
ref _archiveComment,
EntryNameAndCommentEncoding ?? Encoding.UTF8,
ZipEndOfCentralDirectoryBlock.ZipFileCommentMaxLength);
Encoding encoding = EntryNameAndCommentEncoding ?? Encoding.UTF8;
_archiveComment = encoding.GetString(eocd.ArchiveComment);
}
else
{
_archiveComment = null;
_archiveCommentAsString = null;
}

TryReadZip64EndOfCentralDirectory(eocd, eocdStart);
Expand Down Expand Up @@ -711,7 +707,9 @@ private void WriteArchiveEpilogue(long startOfCentralDirectory, long sizeOfCentr
}

// write normal eocd
ZipEndOfCentralDirectoryBlock.WriteBlock(_archiveStream, _entries.Count, startOfCentralDirectory, sizeOfCentralDirectory, _archiveComment);
byte[]? commentBytes = string.IsNullOrEmpty(_archiveComment) ? null : ZipHelper.GetEncodedTruncatedBytes(_archiveComment, EntryNameAndCommentEncoding, ZipEndOfCentralDirectoryBlock.ZipFileCommentMaxLength, out _);

ZipEndOfCentralDirectoryBlock.WriteBlock(_archiveStream, _entries.Count, startOfCentralDirectory, sizeOfCentralDirectory, commentBytes);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,9 @@ public partial class ZipArchiveEntry
// only apply to update mode
private List<ZipGenericExtraField>? _cdUnknownExtraFields;
private List<ZipGenericExtraField>? _lhUnknownExtraFields;
private string? _fileCommentAsString;
private byte[]? _fileComment;
private string? _fileComment;
private readonly CompressionLevel? _compressionLevel;
private bool _hasUnicodeEntryName;
private bool _hasUnicodeComment;
private bool _hasUnicodeEntryNameOrComment;

// Initializes a ZipArchiveEntry instance for an existing archive entry.
internal ZipArchiveEntry(ZipArchive archive, ZipCentralDirectoryFileHeader cd)
Expand Down Expand Up @@ -83,21 +81,17 @@ internal ZipArchiveEntry(ZipArchive archive, ZipCentralDirectoryFileHeader cd)

if (cd.FileComment.Length > 0)
{
_fileComment = cd.FileComment;
_fileCommentAsString = ZipHelper.TruncateEncodedBytesAndReturnString(
ref _fileComment,
_archive.EntryNameAndCommentEncoding ?? Encoding.UTF8,
ushort.MaxValue);
Encoding encoding = _archive.EntryNameAndCommentEncoding ?? Encoding.UTF8;
_fileComment = encoding.GetString(cd.FileComment);
}
else
{
_fileCommentAsString = null;
_fileComment = null;
}

_compressionLevel = null;

_hasUnicodeEntryName = _hasUnicodeComment = IsGeneralPurposeUnicodeFlagSet();
_hasUnicodeEntryNameOrComment = IsGeneralPurposeUnicodeFlagSet();
}

// Initializes a ZipArchiveEntry instance for a new archive entry with a specified compression level.
Expand Down Expand Up @@ -144,7 +138,6 @@ internal ZipArchiveEntry(ZipArchive archive, string entryName)
_cdUnknownExtraFields = null;
_lhUnknownExtraFields = null;

_fileCommentAsString = null;
_fileComment = null;

_compressionLevel = null;
Expand All @@ -158,8 +151,7 @@ internal ZipArchiveEntry(ZipArchive archive, string entryName)
_archive.AcquireArchiveStream(this);
}

_hasUnicodeEntryName = false;
_hasUnicodeComment = false;
_hasUnicodeEntryNameOrComment = false;
}

/// <summary>
Expand Down Expand Up @@ -199,14 +191,16 @@ public int ExternalAttributes

/// <summary>
/// Gets or sets the optional entry comment.
/// The comment encoding is determined by the <c>entryNameEncoding</c> parameter of the <see cref="ZipArchive(Stream,ZipArchiveMode,bool,Encoding?)"/> constructor.
/// </summary>
/// <remarks>
///The comment encoding is determined by the <c>entryNameEncoding</c> parameter of the <see cref="ZipArchive(Stream,ZipArchiveMode,bool,Encoding?)"/> constructor.
/// If the comment byte length is larger than <see cref="ushort.MaxValue"/>, it will be truncated when disposing the archive.
/// </remarks>
[AllowNull]
public string Comment
{
get => _fileCommentAsString ?? string.Empty;
set => (_fileCommentAsString, _fileComment) =
ZipHelper.EncodeAndTruncateStringToBytes(value, _archive?.EntryNameAndCommentEncoding, ushort.MaxValue, out _hasUnicodeComment);
get => ZipHelper.GetTruncatedComment(_fileComment, _archive.EntryNameAndCommentEncoding, ushort.MaxValue);
set => _fileComment = value ?? string.Empty;
}

/// <summary>
Expand All @@ -226,7 +220,10 @@ private set
if (value == null)
throw new ArgumentNullException(nameof(FullName));

_storedEntryNameBytes = ZipHelper.EncodeStringToBytes(value, _archive?.EntryNameAndCommentEncoding, out _hasUnicodeEntryName);
_storedEntryNameBytes = ZipHelper.GetEncodedTruncatedBytes(
value, _archive.EntryNameAndCommentEncoding, 0 /* No truncation */, out bool hasUnicodeEntryName);

_hasUnicodeEntryNameOrComment |= hasUnicodeEntryName;
_storedEntryName = value;

if (ParseFileName(value, _versionMadeByPlatform) == "")
Expand Down Expand Up @@ -432,12 +429,12 @@ private string DecodeBytesToString(byte[] bytes)

Encoding encoding;
// If the bit flag is not set, it either means we are adding a new entry, or
// this is an existing entry with non-unicode (ASCII) comment and fullname.
// this is an existing entry with ASCII comment and fullname.
// If the user chose a default encoding in the ZipArchive.ctor, then we use it, otherwise
// we default to UTF8.
if (!IsGeneralPurposeUnicodeFlagSet())
{
encoding = _archive?.EntryNameAndCommentEncoding ?? Encoding.UTF8;
encoding = _archive.EntryNameAndCommentEncoding ?? Encoding.UTF8;
}
// If bit flag is set, it either means we opened an existing archive entry with the unicode flag set, or
// we had already set the flag for this entry in a previous comment or fullname change.
Expand Down Expand Up @@ -536,7 +533,7 @@ internal void WriteCentralDirectoryFileHeader()
extraFieldLength = (ushort)bigExtraFieldLength;
}

if (_hasUnicodeEntryName || _hasUnicodeComment)
if (_hasUnicodeEntryNameOrComment)
_generalPurposeBitFlag |= BitFlagValues.UnicodeFileNameAndComment;
else
_generalPurposeBitFlag &= ~BitFlagValues.UnicodeFileNameAndComment;
Expand All @@ -554,10 +551,13 @@ internal void WriteCentralDirectoryFileHeader()
writer.Write((ushort)_storedEntryNameBytes.Length); // File Name Length (2 bytes)
writer.Write(extraFieldLength); // Extra Field Length (2 bytes)

byte[]? commentBytes = string.IsNullOrEmpty(_fileComment) ? null :
ZipHelper.GetEncodedTruncatedBytes(_fileComment, _archive.EntryNameAndCommentEncoding, ushort.MaxValue, out _);

// This should hold because of how we read it originally in ZipCentralDirectoryFileHeader:
Debug.Assert((_fileComment == null) || (_fileComment.Length <= ushort.MaxValue));
Debug.Assert((commentBytes == null) || (commentBytes.Length <= ushort.MaxValue));

writer.Write(_fileComment != null ? (ushort)_fileComment.Length : (ushort)0); // file comment length
writer.Write(commentBytes != null ? (ushort)commentBytes.Length : (ushort)0); // file comment length
writer.Write((ushort)0); // disk number start
writer.Write((ushort)0); // internal file attributes
writer.Write(_externalFileAttr); // external file attributes
Expand All @@ -571,8 +571,8 @@ internal void WriteCentralDirectoryFileHeader()
if (_cdUnknownExtraFields != null)
ZipGenericExtraField.WriteAllBlocks(_cdUnknownExtraFields, _archive.ArchiveStream);

if (_fileComment != null)
writer.Write(_fileComment);
if (commentBytes != null)
writer.Write(commentBytes);
}

// returns false if fails, will get called on every entry before closing in update mode
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Text;

namespace System.IO.Compression
Expand Down Expand Up @@ -194,56 +195,37 @@ private static bool SeekBackwardsAndRead(Stream stream, byte[] buffer, out int b
}
}

// Converts the specified string into bytes using the optional specified encoding. If null, then the encoding is calculated from the string itself.
internal static byte[] EncodeStringToBytes(string? text, Encoding? encoding, out bool isUTF8)
internal static string GetTruncatedComment(string? comment, Encoding? archiveEncoding, int maxValue)
{
if (text == null)
{
text = string.Empty;
}
if (encoding == null)
{
encoding = GetEncoding(text);
}
isUTF8 = encoding.CodePage == 65001;
return encoding.GetBytes(text);
byte[] bytes = GetEncodedTruncatedBytes(comment ?? string.Empty, archiveEncoding, maxValue, out bool isUTF8);
Encoding encoding = archiveEncoding ?? (isUTF8 ? Encoding.UTF8 : Encoding.ASCII);
return encoding.GetString(bytes);
}

internal static (string, byte[]) EncodeAndTruncateStringToBytes(string? text, Encoding? encoding, int maxBytes, out bool isUTF8)
// Converts the specified string into bytes using the optional specified encoding.
// If the encoding null, then the encoding is calculated from the string itself.
// If maxBytes is greater than zero, the returned string will be truncated to a total
// number of characters whose bytes do not add up to more than that number.
internal static byte[] GetEncodedTruncatedBytes(string? text, Encoding? encoding, int maxBytes, out bool isUTF8)
{
if (text == null)
{
text = string.Empty;
}
if (encoding == null)
{
encoding = GetEncoding(text);
}
byte[] bytes = EncodeStringToBytes(text, encoding, out isUTF8);
string truncatedString = TruncateEncodedBytesAndReturnString(ref bytes, encoding, maxBytes);
return (truncatedString, bytes);
}
text ??= string.Empty;
encoding ??= GetEncoding(text);

internal static string TruncateEncodedBytesAndReturnString(ref byte[] bytes, Encoding encoding, int maxBytes)
{
TruncateCommentIfNeeded(ref bytes, encoding, maxBytes);
return encoding.GetString(bytes);
}
isUTF8 = encoding.CodePage == 65001;

// Truncate the string if larger that max total bytes allowed.
// Ensure we cut whole chars, which depend on encoding.
// Returns true if it had to be truncated.
internal static void TruncateCommentIfNeeded(ref byte[] bytes, Encoding encoding, int maxBytes)
{
if (bytes.Length > maxBytes)
byte[] bytes = encoding.GetBytes(text);

if (maxBytes > 0 && bytes.Length > maxBytes)
{
int bytesPerChar = encoding.GetMaxByteCount(1);

int encodedCharsThatFit = maxBytes / bytesPerChar;
int totalBytesToTruncate = encodedCharsThatFit * bytesPerChar;

bytes = bytes[0..totalBytesToTruncate];
return bytes[0..totalBytesToTruncate];
}

return bytes;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@
<Compile Include="Utilities\WrappedStream.cs" />
<Compile Include="XunitAssemblyAttributes.cs" />
<Compile Include="ZipArchive\zip_CreateTests.cs" />
<Compile Include="ZipArchive\zip_CreateTests.Comments.cs" />
<Compile Include="ZipArchive\zip_InvalidParametersAndStrangeFiles.cs" />
<Compile Include="ZipArchive\zip_ManualAndCompatibilityTests.cs" />
<Compile Include="ZipArchive\zip_netcoreappTests.cs" />
<Compile Include="ZipArchive\zip_ReadTests.cs" />
<Compile Include="ZipArchive\zip_UpdateTests.cs" />
<Compile Include="ZipArchive\zip_UpdateTests.Comments.cs" />
<Compile Include="$(CommonTestPath)System\IO\PathFeatures.cs" Link="Common\System\IO\PathFeatures.cs" />
<Compile Include="$(CommonTestPath)System\IO\Compression\CRC.cs" Link="Common\System\IO\Compression\CRC.cs" />
<Compile Include="$(CommonTestPath)System\IO\Compression\CompressionStreamTestBase.cs" Link="Common\System\IO\Compression\CompressionStreamTestBase.cs" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using Xunit;
using System.Text;

namespace System.IO.Compression.Tests
{
public partial class zip_CreateTests : ZipFileTestBase
{
[Theory]
// General purpose bit flag must get the appropriate bit set if a file comment or an entry name is unicode
[InlineData("ascii", "ascii!!!", "utf-8", "UÄäÖöÕõÜü")]
[InlineData("utf-8", "UÄäÖöÕõÜü", "ascii", "ascii!!!")]
[InlineData("ascii", "ascii!!!", "latin1", "LÄäÖöÕõÜü")]
[InlineData("latin1", "LÄäÖöÕõÜü", "ascii", "ascii!!!")]
[InlineData("utf-8", "UÄäÖöÕõÜü", "latin1", "LÄäÖöÕõÜü")]
[InlineData("latin1", "LÄäÖöÕõÜü", "utf-8", "UÄäÖöÕõÜü")]
public static void Create_ZipArchiveEntry_DifferentEncodings_FullName_And_Comment(string en1, string s1, string en2, string s2)
{
Encoding e1 = Encoding.GetEncoding(en1);
Encoding e2 = Encoding.GetEncoding(en2);
string entryName = e1.GetString(e1.GetBytes(s1));
string comment = e2.GetString(e2.GetBytes(s2));

var stream = new MemoryStream();
var testStream = new WrappedStream(stream, true, true, true, null);

// Create with no encoding to autoselect it if one of the two strings is unicode
using (var zip = new ZipArchive(testStream, ZipArchiveMode.Create, leaveOpen: true))
{
ZipArchiveEntry entry = zip.CreateEntry(entryName, CompressionLevel.NoCompression);
entry.Comment = comment;

Assert.Equal(entryName, entry.FullName);
Assert.Equal(comment, entry.Comment);
}

// Open with no encoding
using (var zip = new ZipArchive(testStream, ZipArchiveMode.Read, leaveOpen: false))
{
foreach (var entry in zip.Entries)
{
Assert.Equal(entryName, entry.FullName);
Assert.Equal(comment, entry.Comment);
}
}
}
}
}
Loading