Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
08f2a8b
ref: Add public APIs.
carlossanlop Sep 22, 2021
973fc8c
src: Expose the archive and entry comments.
carlossanlop Sep 22, 2021
be54e4a
tests: Add update tests for archives and for entries. They cover crea…
carlossanlop Sep 22, 2021
28467ae
Fix encoding detection feedback
carlossanlop Sep 28, 2021
297b9e5
Fix encoding detection feedback
carlossanlop Sep 28, 2021
338b184
Address suggestions
carlossanlop Oct 6, 2021
31a2227
Switch names of archive comment fields.
carlossanlop Oct 18, 2021
78d7e37
Address unicode bit flag sharing problem.
carlossanlop Oct 19, 2021
0a29149
Add more test cases
carlossanlop Oct 19, 2021
b93305b
Adjust tests
carlossanlop Oct 19, 2021
7496628
Add newline so comment only applies to one line
carlossanlop Oct 19, 2021
a949883
Ensure string byte truncation is aligned to encoding's char size.
carlossanlop Nov 27, 2021
9738c30
Remove empty check for non-nullable string. Also remove unnecessary D…
carlossanlop Nov 29, 2021
5179ea2
Defer calculation of truncated encoding string to getter and to writ…
carlossanlop Dec 4, 2021
0e47de4
Rename test arguments
carlossanlop Dec 6, 2021
18bbcd4
Only use bytes[]
carlossanlop Dec 7, 2021
31602b3
Remove unnecessary bit comment
carlossanlop Dec 7, 2021
0ae8a64
Remove unnecessary length check
carlossanlop Dec 7, 2021
e9d5b67
Address feedback
carlossanlop Dec 8, 2021
77b6ee5
Suggestion by adamsitnik: write only if length > 0
carlossanlop Dec 7, 2021
8daedcf
Simplify EntryName code
carlossanlop Dec 8, 2021
1596bb3
Move entryName code to original location
carlossanlop Dec 8, 2021
3444631
In UTF8, use Runes to detect code point length to prevent truncating …
carlossanlop Jan 19, 2022
0749c84
Address suggestions
carlossanlop Feb 3, 2022
b08f0ac
Move ZipTestHelper back to its original position because S.IO.Compres…
carlossanlop Feb 3, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Address suggestions
  • Loading branch information
carlossanlop committed Feb 3, 2022
commit 0749c843bc62d751909f11cff1d320d02744ba75
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Text;
Expand Down Expand Up @@ -236,7 +237,6 @@ internal static byte[] GetEncodedTruncatedBytesFromString(string? text, Encoding
return bytes[0..totalCodePoints];
}


bytes = encoding.GetBytes(text);
return maxBytes < bytes.Length ? bytes[0..maxBytes] : bytes;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
<Compile Include="ZipArchive\zip_ReadTests.cs" />
<Compile Include="ZipArchive\zip_UpdateTests.cs" />
<Compile Include="ZipArchive\zip_UpdateTests.Comments.cs" />
<Compile Include="ZipTestHelper.cs" />
<Compile Include="$(CommonTestPath)System\IO\PathFeatures.cs" Link="Common\System\IO\PathFeatures.cs" />
<Compile Include="$(CommonTestPath)System\IO\Compression\CRC.cs" Link="Common\System\IO\Compression\CRC.cs" />
<Compile Include="$(CommonTestPath)System\IO\Compression\CompressionStreamTestBase.cs" Link="Common\System\IO\Compression\CompressionStreamTestBase.cs" />
Expand All @@ -35,7 +36,6 @@
<Compile Include="$(CommonTestPath)System\IO\Compression\LocalMemoryStream.cs" Link="Common\System\IO\Compression\LocalMemoryStream.cs" />
<Compile Include="$(CommonTestPath)System\IO\Compression\StreamHelpers.cs" Link="Common\System\IO\Compression\StreamHelpers.cs" />
<Compile Include="$(CommonTestPath)System\IO\TempFile.cs" Link="Common\System\IO\TempFile.cs" />
<Compile Include="$(CommonTestPath)System\IO\Compression\ZipTestHelper.cs" Link="Common\System\IO\Compression\ZipTestHelper.cs" />
<Compile Include="$(CommonPath)System\Threading\Tasks\TaskToApm.cs" Link="Common\System\Threading\Tasks\TaskToApm.cs" />
<Compile Include="$(CommonTestPath)System\IO\ConnectedStreams.cs" Link="Common\System\IO\ConnectedStreams.cs" />
<Compile Include="$(CommonPath)System\Net\MultiArrayBuffer.cs" Link="ProductionCode\Common\System\Net\MultiArrayBuffer.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,33 +11,33 @@ public partial class zip_CreateTests : ZipFileTestBase
[Theory]
[MemberData(nameof(Utf8Comment_Data))]
public static void Create_Comment_AsciiEntryName_NullEncoding(string originalComment, string expectedComment) =>
Create_Comment_EntryName_Encoding_Internal("file.txt", originalComment, expectedComment, null);
Create_Comment_EntryName_Encoding_Internal(AsciiFileName, originalComment, expectedComment, null);

[Theory]
[MemberData(nameof(Utf8Comment_Data))]
public static void Create_Comment_AsciiEntryName_Utf8Encoding(string originalComment, string expectedComment) =>
Create_Comment_EntryName_Encoding_Internal("file.txt", originalComment, expectedComment, Encoding.UTF8);
Create_Comment_EntryName_Encoding_Internal(AsciiFileName, originalComment, expectedComment, Encoding.UTF8);

[Theory]
[MemberData(nameof(Latin1Comment_Data))]
public static void Create_Comment_AsciiEntryName_Latin1Encoding(string originalComment, string expectedComment) =>
Create_Comment_EntryName_Encoding_Internal("file.txt", originalComment, expectedComment, Encoding.Latin1);
Create_Comment_EntryName_Encoding_Internal(AsciiFileName, originalComment, expectedComment, Encoding.Latin1);

[Theory]
[MemberData(nameof(Utf8Comment_Data))]
public static void Create_Comment_Utf8EntryName_NullEncoding(string originalComment, string expectedComment) =>
Create_Comment_EntryName_Encoding_Internal($"{SmileyEmoji}.txt", originalComment, expectedComment, null);
Create_Comment_EntryName_Encoding_Internal(Utf8FileName, originalComment, expectedComment, null);

[Theory]
[MemberData(nameof(Utf8Comment_Data))]
public static void Create_Comment_Utf8EntryName_Utf8Encoding(string originalComment, string expectedComment) =>
Create_Comment_EntryName_Encoding_Internal($"{SmileyEmoji}.txt", originalComment, expectedComment, Encoding.UTF8);
Create_Comment_EntryName_Encoding_Internal(Utf8FileName, originalComment, expectedComment, Encoding.UTF8);

[Theory]
[MemberData(nameof(Latin1Comment_Data))]
public static void Create_Comment_Utf8EntryName_Latin1Encoding(string originalComment, string expectedComment) =>
// Emoji not supported by latin1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if you pass an emoji with latin1 encoding? I assume it just encodes it as something meaningless. We should assert that the latin1-encoded-emoji still gets truncated.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how I would assert that in the tests. I just wanted to make sure the string got truncated, even if the final outputted string, when viewed by the user, will be garbage where the emoji was saved.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how I would assert that in the tests.

On Latin1Comment_Data add something like this:

string latin1ExpectedALettersAndTruncatedEmoji = new string('a', ushort.MaxValue - 1) + SmileyEmoji[0];
string latin1OriginalALettersAndEmoji = new string('a', ushort.MaxValue - 1) + SmileyEmoji;

My point being that the truncation logic only runs when it makes sense (UTF8).

Create_Comment_EntryName_Encoding_Internal($"{LowerCaseOUmlautChar}.txt", originalComment, expectedComment, Encoding.Latin1);
Create_Comment_EntryName_Encoding_Internal(Utf8AndLatin1FileName, originalComment, expectedComment, Encoding.Latin1);

private static void Create_Comment_EntryName_Encoding_Internal(string entryName, string originalComment, string expectedComment, Encoding encoding)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,46 +13,45 @@ public partial class zip_UpdateTests : ZipFileTestBase
[Theory]
[MemberData(nameof(Utf8Comment_Data))]
public static void Update_Comment_AsciiEntryName_NullEncoding(string originalComment, string expectedComment) =>
Update_Comment_EntryName_Encoding_Internal("file.txt",
Update_Comment_EntryName_Encoding_Internal(AsciiFileName,
originalComment, expectedComment, null,
new string('a', ushort.MaxValue - 1) + $"{CopyrightChar}", new string('a', ushort.MaxValue - 1));
ALettersUShortMaxValueMinusOneAndCopyRightChar, ALettersUShortMaxValueMinusOne);

[Theory]
[MemberData(nameof(Utf8Comment_Data))]
public static void Update_Comment_AsciiEntryName_Utf8Encoding(string originalComment, string expectedComment) =>
Update_Comment_EntryName_Encoding_Internal("file.txt",
Update_Comment_EntryName_Encoding_Internal(AsciiFileName,
originalComment, expectedComment, Encoding.UTF8,
new string('a', ushort.MaxValue - 1) + $"{CopyrightChar}", new string('a', ushort.MaxValue - 1));
ALettersUShortMaxValueMinusOneAndCopyRightChar, ALettersUShortMaxValueMinusOne);

[Theory]
[MemberData(nameof(Latin1Comment_Data))]
public static void Update_Comment_AsciiEntryName_Latin1Encoding(string originalComment, string expectedComment) =>
Update_Comment_EntryName_Encoding_Internal("file.txt",
Update_Comment_EntryName_Encoding_Internal(AsciiFileName,
originalComment, expectedComment, Encoding.Latin1,
new string('a', ushort.MaxValue - 1) + $"{CopyrightChar}{CopyrightChar}", new string('a', ushort.MaxValue - 1) + $"{CopyrightChar}");
ALettersUShortMaxValueMinusOneAndTwoCopyRightChars, ALettersUShortMaxValueMinusOneAndCopyRightChar);

[Theory]
[MemberData(nameof(Utf8Comment_Data))]
public static void Update_Comment_Utf8EntryName_NullEncoding(string originalComment, string expectedComment) =>
Update_Comment_EntryName_Encoding_Internal($"{SmileyEmoji}.txt",
Update_Comment_EntryName_Encoding_Internal(Utf8FileName,
originalComment, expectedComment, null,
new string('a', ushort.MaxValue - 1) + $"{CopyrightChar}", new string('a', ushort.MaxValue - 1));
ALettersUShortMaxValueMinusOneAndCopyRightChar, ALettersUShortMaxValueMinusOne);

[Theory]
[MemberData(nameof(Utf8Comment_Data))]
public static void Update_Comment_Utf8EntryName_Utf8Encoding(string originalComment, string expectedComment) =>
Update_Comment_EntryName_Encoding_Internal($"{SmileyEmoji}.txt",
Update_Comment_EntryName_Encoding_Internal(Utf8FileName,
originalComment, expectedComment, Encoding.UTF8,
new string('a', ushort.MaxValue - 1) + $"{CopyrightChar}", new string('a', ushort.MaxValue - 1));
ALettersUShortMaxValueMinusOneAndCopyRightChar, ALettersUShortMaxValueMinusOne);

[Theory]
[MemberData(nameof(Latin1Comment_Data))]
public static void Update_Comment_Utf8EntryName_Latin1Encoding(string originalComment, string expectedComment) =>
// Emoji not supported by latin1
Update_Comment_EntryName_Encoding_Internal($"{LowerCaseOUmlautChar}.txt",
// Emoji is not supported/detected in latin1
Update_Comment_EntryName_Encoding_Internal(Utf8AndLatin1FileName,
originalComment, expectedComment, Encoding.Latin1,
new string('a', ushort.MaxValue - 1) + $"{CopyrightChar}{CopyrightChar}", new string('a', ushort.MaxValue - 1) + $"{CopyrightChar}");

ALettersUShortMaxValueMinusOneAndTwoCopyRightChars, ALettersUShortMaxValueMinusOneAndCopyRightChar);

private static void Update_Comment_EntryName_Encoding_Internal(string entryName,
string originalCreateComment, string expectedCreateComment, Encoding encoding,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -384,9 +384,18 @@ internal static void AddEntry(ZipArchive archive, string name, string contents,
}
}

protected const string SmileyEmoji = "\ud83d\ude04";
protected const string LowerCaseOUmlautChar = "\u00F6";
protected const string CopyrightChar = "\u00A9";
protected const string Utf8SmileyEmoji = "\ud83d\ude04";
protected const string Utf8LowerCaseOUmlautChar = "\u00F6";
protected const string Utf8CopyrightChar = "\u00A9";
protected const string AsciiFileName = "file.txt";
// The o with umlaut is a character that exists in both latin1 and utf8
protected const string Utf8AndLatin1FileName = $"{Utf8LowerCaseOUmlautChar}.txt";
// emojis only make sense in utf8
protected const string Utf8FileName = $"{Utf8SmileyEmoji}.txt";
protected static readonly string ALettersUShortMaxValueMinusOne = new string('a', ushort.MaxValue - 1);
protected static readonly string ALettersUShortMaxValue = ALettersUShortMaxValueMinusOne + 'a';
protected static readonly string ALettersUShortMaxValueMinusOneAndCopyRightChar = ALettersUShortMaxValueMinusOne + Utf8CopyrightChar;
protected static readonly string ALettersUShortMaxValueMinusOneAndTwoCopyRightChars = ALettersUShortMaxValueMinusOneAndCopyRightChar + Utf8CopyrightChar;

// Returns pairs that are returned the same way by Utf8 and Latin1
// Returns: originalComment, expectedComment
Expand All @@ -395,28 +404,31 @@ private static IEnumerable<object[]> SharedComment_Data()
yield return new object[] { null, string.Empty };
yield return new object[] { string.Empty, string.Empty };
yield return new object[] { "a", "a" };
yield return new object[] { LowerCaseOUmlautChar, LowerCaseOUmlautChar };
yield return new object[] { Utf8LowerCaseOUmlautChar, Utf8LowerCaseOUmlautChar };
}

// Returns pairs as expected by Utf8
// Returns: originalComment, expectedComment
public static IEnumerable<object[]> Utf8Comment_Data()
{
string asciiExpectedExactMaxLength = new('a', ushort.MaxValue);
string asciiOriginalOverMaxLength = asciiExpectedExactMaxLength + "aaa";
string asciiOriginalOverMaxLength = ALettersUShortMaxValue + "aaa";

// A smiley emoji code point consists of two characters,
// meaning the whole emoji should be fully truncated
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should add a case where the emoji does not get truncated.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only case where the string is truncated is when it's length is greater than expected. The contents don't matter. So if a string is smaller than the max length, it should remain untouched, regardless of its contents.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

string utf8ExpectedJustALetters = new('a', ushort.MaxValue - 1);
string utf8OriginalALettersAndOneEmoji = utf8ExpectedJustALetters + SmileyEmoji;
string utf8OriginalALettersAndOneEmojiDoesNotFit = ALettersUShortMaxValueMinusOne + Utf8SmileyEmoji;

// A smiley emoji code point consists of two characters,
// so it should not be truncated if it's the last character and the total length is not over the limit.
string utf8OriginalALettersAndOneEmojiFits = "aaaaa" + Utf8SmileyEmoji;

yield return new object[] { asciiOriginalOverMaxLength, ALettersUShortMaxValue };
yield return new object[] { utf8OriginalALettersAndOneEmojiDoesNotFit, ALettersUShortMaxValueMinusOne };
yield return new object[] { utf8OriginalALettersAndOneEmojiFits, utf8OriginalALettersAndOneEmojiFits };

foreach (object[] e in SharedComment_Data())
{
yield return e;
}

yield return new object[] { asciiOriginalOverMaxLength, asciiExpectedExactMaxLength };
yield return new object[] { utf8OriginalALettersAndOneEmoji, utf8ExpectedJustALetters };
}

// Returns pairs as expected by Latin1
Expand All @@ -425,15 +437,15 @@ public static IEnumerable<object[]> Latin1Comment_Data()
{
// In Latin1, all characters are exactly 1 byte

string latin1ExpectedALettersAndOneOUmlaut = new string('a', ushort.MaxValue - 1) + LowerCaseOUmlautChar;
string latin1OriginalALettersAndTwoOUmlauts = latin1ExpectedALettersAndOneOUmlaut + LowerCaseOUmlautChar;
string latin1ExpectedALettersAndOneOUmlaut = ALettersUShortMaxValueMinusOne + Utf8LowerCaseOUmlautChar;
string latin1OriginalALettersAndTwoOUmlauts = latin1ExpectedALettersAndOneOUmlaut + Utf8LowerCaseOUmlautChar;

yield return new object[] { latin1OriginalALettersAndTwoOUmlauts, latin1ExpectedALettersAndOneOUmlaut };

foreach (object[] e in SharedComment_Data())
{
yield return e;
}

yield return new object[] { latin1OriginalALettersAndTwoOUmlauts, latin1ExpectedALettersAndOneOUmlaut };
}
}
}