diff --git a/.azure/pipelines/ci-fuzz.yml b/.azure/pipelines/ci-fuzz.yml index 4081c3d..bf1e780 100644 --- a/.azure/pipelines/ci-fuzz.yml +++ b/.azure/pipelines/ci-fuzz.yml @@ -31,3 +31,17 @@ stages: - template: jobs/fuzzing-libFuzzer.yml parameters: method: Base64_Url_Decode + + - stage: Base64_Whitespace + dependsOn: [] # break sequential dependency + jobs: + - template: jobs/fuzzing-libFuzzer-no-matrix.yml + parameters: + method: Base64_Default_Decode_Whitespace + + - stage: Base64Url_Whitespace + dependsOn: [] # break sequential dependency + jobs: + - template: jobs/fuzzing-libFuzzer-no-matrix.yml + parameters: + method: Base64_Url_Decode_Whitespace diff --git a/.azure/pipelines/jobs/fuzzing-libFuzzer-no-matrix.yml b/.azure/pipelines/jobs/fuzzing-libFuzzer-no-matrix.yml new file mode 100644 index 0000000..2d47038 --- /dev/null +++ b/.azure/pipelines/jobs/fuzzing-libFuzzer-no-matrix.yml @@ -0,0 +1,12 @@ +parameters: + method: '' + +jobs: + - job: ${{ parameters.method }} + pool: + vmImage: 'ubuntu-20.04' + timeoutInMinutes: 0 # 360 minutes + steps: + - template: steps/fuzz.yml + parameters: + method: ${{ parameters.method }} diff --git a/.azure/pipelines/jobs/fuzzing-libFuzzer.yml b/.azure/pipelines/jobs/fuzzing-libFuzzer.yml index 7186669..3e7365b 100644 --- a/.azure/pipelines/jobs/fuzzing-libFuzzer.yml +++ b/.azure/pipelines/jobs/fuzzing-libFuzzer.yml @@ -16,66 +16,6 @@ jobs: SSE_disabled: DOTNET_EnableSSE: 0 steps: - # ~SDKs already installed~ - - template: steps/dotnet-install.yml - # Needed for sharpfuzz - - task: UseDotNet@2 - displayName: 'Use .NET SDK 6.0' - inputs: - version: 6.x - includePreviewVersions: true - installationPath: $(Agent.ToolsDirectory)/dotnet - - - bash: | - cd fuzz - - # -L to follow the redirect - curl -Lo libfuzzer-dotnet https://github.com/gfoidl/libfuzzer-dotnet/releases/download/v1.1.0-preview-1/libfuzzer-dotnet - - chmod +x *.sh - chmod +x libfuzzer-dotnet - displayName: pre-requisites - - - bash: | - # Install SharpFuzz.CommandLine global .NET tool - dotnet tool install --global SharpFuzz.CommandLine - displayName: setup - - - bash: | - # Workaround - # https://github.com/Microsoft/azure-pipelines-tasks/issues/8291#issuecomment-441707116 - export PATH="$PATH:$HOME/.dotnet/tools" - - cd fuzz - ./init.sh - displayName: init - - - bash: | - echo "------------------------------------------------" - echo "writing random data (encoded) to testcases/4.dat" - echo "------------------------------------------------" - - cd fuzz - head /dev/urandom | base64 | tee testcases/4.dat - displayName: 'more entropy for fuzz' - - - bash: | - if [[ "$DOTNET_ENABLEAVX" == "0" ]]; then - export DOTNET_EnableAVX=0 - fi - - if [[ "$DOTNET_ENABLESSE" == "0" ]]; then - export DOTNET_EnableSSE=0 - fi - - cd fuzz - - # TIMEOUT variable defined in pipeline-UI (e.g. 350m = 21000s) - ./run.sh libFuzzer $(TIMEOUT) ${{ parameters.method }} - displayName: run - - - task: PublishBuildArtifacts@1 - condition: failed() - inputs: - pathtoPublish: 'fuzz/findings/crashes' - artifactName: '${{ parameters.method }}' + - template: steps/fuzz.yml + parameters: + method: ${{ parameters.method }} diff --git a/.azure/pipelines/jobs/steps/fuzz.yml b/.azure/pipelines/jobs/steps/fuzz.yml new file mode 100644 index 0000000..2fd9e8e --- /dev/null +++ b/.azure/pipelines/jobs/steps/fuzz.yml @@ -0,0 +1,67 @@ +parameters: + method: '' + +steps: + # ~SDKs already installed~ + - template: ./dotnet-install.yml + # Needed for sharpfuzz + - task: UseDotNet@2 + displayName: 'Use .NET SDK 6.0' + inputs: + version: 6.x + includePreviewVersions: true + installationPath: $(Agent.ToolsDirectory)/dotnet + + - bash: | + cd fuzz + + # -L to follow the redirect + curl -Lo libfuzzer-dotnet https://github.com/gfoidl/libfuzzer-dotnet/releases/download/v1.1.0-preview-1/libfuzzer-dotnet + + chmod +x *.sh + chmod +x libfuzzer-dotnet + displayName: pre-requisites + + - bash: | + # Install SharpFuzz.CommandLine global .NET tool + dotnet tool install --global SharpFuzz.CommandLine + displayName: setup + + - bash: | + # Workaround + # https://github.com/Microsoft/azure-pipelines-tasks/issues/8291#issuecomment-441707116 + export PATH="$PATH:$HOME/.dotnet/tools" + + cd fuzz + ./init.sh + displayName: init + + - bash: | + echo "------------------------------------------------" + echo "writing random data (encoded) to testcases/4.dat" + echo "------------------------------------------------" + + cd fuzz + head /dev/urandom | base64 | tee testcases/4.dat + displayName: 'more entropy for fuzz' + + - bash: | + if [[ "$DOTNET_ENABLEAVX" == "0" ]]; then + export DOTNET_EnableAVX=0 + fi + + if [[ "$DOTNET_ENABLESSE" == "0" ]]; then + export DOTNET_EnableSSE=0 + fi + + cd fuzz + + # TIMEOUT variable defined in pipeline-UI (e.g. 350m = 21000s) + ./run.sh libFuzzer $(TIMEOUT) ${{ parameters.method }} + displayName: run + + - task: PublishBuildArtifacts@1 + condition: failed() + inputs: + pathtoPublish: 'fuzz/findings/crashes' + artifactName: '${{ parameters.method }}' diff --git a/fuzz/gfoidl.Base64.FuzzTests/AflFuzzImpl.cs b/fuzz/gfoidl.Base64.FuzzTests/AflFuzzImpl.cs index 1eac4e3..b4ec2d4 100644 --- a/fuzz/gfoidl.Base64.FuzzTests/AflFuzzImpl.cs +++ b/fuzz/gfoidl.Base64.FuzzTests/AflFuzzImpl.cs @@ -6,9 +6,14 @@ namespace gfoidl.Base64.FuzzTests; internal sealed class AflFuzzImpl : FuzzImpl { - public override void Base64_Default_Decode() => Fuzzer.Run(Base64_Default_Decode); - public override void Base64_Url_Decode () => Fuzzer.Run(Base64_Url_Decode); + public override void Base64_Default_Decode(WhitespaceMode whitespaceMode) + => Fuzzer.Run(base64Text => Base64_Default_Decode(base64Text, whitespaceMode)); + public override void Base64_Url_Decode(WhitespaceMode whitespaceMode) + => Fuzzer.Run(base64Text => Base64_Url_Decode(base64Text, whitespaceMode)); //------------------------------------------------------------------------- - private static void Base64_Default_Decode(string base64Text) => Base64_Decode(base64Text, Base64.Default); - private static void Base64_Url_Decode (string base64Text) => Base64_Decode(base64Text, Base64.Url); + private static void Base64_Default_Decode(string base64Text, WhitespaceMode whitespaceMode) + => Base64_Decode(base64Text, Base64.Default, whitespaceMode); + + private static void Base64_Url_Decode(string base64Text, WhitespaceMode whitespaceMode) + => Base64_Decode(base64Text, Base64.Url, whitespaceMode); } diff --git a/fuzz/gfoidl.Base64.FuzzTests/FuzzImpl.cs b/fuzz/gfoidl.Base64.FuzzTests/FuzzImpl.cs index 3bbdde3..ac6f550 100644 --- a/fuzz/gfoidl.Base64.FuzzTests/FuzzImpl.cs +++ b/fuzz/gfoidl.Base64.FuzzTests/FuzzImpl.cs @@ -8,11 +8,11 @@ namespace gfoidl.Base64.FuzzTests; internal abstract class FuzzImpl { - public abstract void Base64_Default_Decode(); - public abstract void Base64_Url_Decode (); + public abstract void Base64_Default_Decode(WhitespaceMode whitespaceMode); + public abstract void Base64_Url_Decode (WhitespaceMode whitespaceMode); //------------------------------------------------------------------------- [SkipLocalsInit] - protected static void Base64_Decode(ReadOnlySpan base64Text, Base64 encoder) + protected static void Base64_Decode(ReadOnlySpan base64Text, Base64 encoder, WhitespaceMode whitespaceMode) { byte[]? dataArrayFromPool = null; try @@ -22,16 +22,22 @@ protected static void Base64_Decode(ReadOnlySpan base64Text, Base64 encode ? stackalloc byte[256] : dataArrayFromPool = ArrayPool.Shared.Rent(maxDecodedLength); - OperationStatus status = encoder.Decode(base64Text, data, out int consumed, out int written); + OperationStatus status = encoder.Decode(base64Text, data, out int consumed, out int written, whitespaceMode); - if (ContainsInvalidData(base64Text, encoder) && status != OperationStatus.InvalidData) + int idxOfInvalidData = IndexOfInvalidData(base64Text, encoder, whitespaceMode); + if (idxOfInvalidData >= 0 && status != OperationStatus.InvalidData) { - throw new Exception("contains invalid data -- not detected"); + throw new Exception($""" + contains invalid data -- not detected + status: {status} + idx: {idxOfInvalidData} + value: {base64Text[idxOfInvalidData]} + """); } if (status == OperationStatus.Done && consumed != base64Text.Length) { - throw new Exception("consumed != encoded.Length"); + throw new Exception($"consumed ({consumed}) != encoded.Length ({base64Text.Length})"); } } catch (ArgumentOutOfRangeException) @@ -51,34 +57,80 @@ protected static void Base64_Decode(ReadOnlySpan base64Text, Base64 encode } } //------------------------------------------------------------------------- - private static bool ContainsInvalidData(ReadOnlySpan base64Text, Base64 encoder) + private static int IndexOfInvalidData(ReadOnlySpan base64Text, Base64 encoder, WhitespaceMode whitespaceMode) { - ReadOnlySpan decodingMap = default; + ReadOnlySpan decodingMap; if (encoder is Base64Impl) { decodingMap = Base64Encoding.DecodingMap; - // Check for padding at the end - int paddingCount = 0; - - if (base64Text.Length > 1 && base64Text[^1] == Constants.EncodingPadChar) paddingCount++; - if (base64Text.Length > 2 && base64Text[^2] == Constants.EncodingPadChar) paddingCount++; - - base64Text = base64Text[0..^paddingCount]; + // Trim padding at the end + int paddingCountInclFinalWhitespace = GetPaddingCountInclFinalWhitespace(base64Text, whitespaceMode); + base64Text = base64Text[0..^paddingCountInclFinalWhitespace]; } else if (encoder is Base64Impl) { decodingMap = Base64UrlEncoding.DecodingMap; } + else + { + throw new NotSupportedException(); + } for (int i = 0; i < base64Text.Length; ++i) { char ch = base64Text[i]; - if (ch > 0x7F || decodingMap[ch] == -1) return true; + if (whitespaceMode == WhitespaceMode.Allow && IsAllowedWhitespace(ch)) + { + continue; + } + + if (ch > 0x7F || decodingMap[ch] == -1) return i; } - return false; + return -1; + } + //------------------------------------------------------------------------- + private static int GetPaddingCountInclFinalWhitespace(ReadOnlySpan base64Text, WhitespaceMode whitespaceMode) + { + // Check for padding at the end + int paddingCount = 0; + + if (whitespaceMode == WhitespaceMode.None) + { + if (base64Text.Length > 1 && base64Text[^1] == Constants.EncodingPadChar) paddingCount++; + if (base64Text.Length > 2 && base64Text[^2] == Constants.EncodingPadChar) paddingCount++; + } + else if (whitespaceMode == WhitespaceMode.Allow) + { + for (int i = base64Text.Length - 1; i >= 0; --i) + { + char ch = base64Text[i]; + + if (IsAllowedWhitespace(ch) || ch == Constants.EncodingPadChar) + { + paddingCount++; + } + else + { + break; + } + } + } + else + { + throw new NotSupportedException($"Unknown mode: {whitespaceMode}"); + } + + return paddingCount; + } + //------------------------------------------------------------------------- + private static readonly char[] s_allowedWhitespace = { '\t', '\n', '\r', ' ' }; + + private static bool IsAllowedWhitespace(char ch) + { + return s_allowedWhitespace.AsSpan().IndexOf(ch) >= 0; } } diff --git a/fuzz/gfoidl.Base64.FuzzTests/LibFuzzerFuzzImpl.cs b/fuzz/gfoidl.Base64.FuzzTests/LibFuzzerFuzzImpl.cs index 6844c0e..9777437 100644 --- a/fuzz/gfoidl.Base64.FuzzTests/LibFuzzerFuzzImpl.cs +++ b/fuzz/gfoidl.Base64.FuzzTests/LibFuzzerFuzzImpl.cs @@ -8,13 +8,19 @@ namespace gfoidl.Base64.FuzzTests; internal sealed class LibFuzzerFuzzImpl : FuzzImpl { - public override void Base64_Default_Decode() => Fuzzer.LibFuzzer.Run(Base64_Default_Decode); - public override void Base64_Url_Decode() => Fuzzer.LibFuzzer.Run(Base64_Url_Decode); + public override void Base64_Default_Decode(WhitespaceMode whitespaceMode) + => Fuzzer.LibFuzzer.Run(base64 => Base64_Default_Decode(base64, whitespaceMode)); + + public override void Base64_Url_Decode(WhitespaceMode whitespaceMode) + => Fuzzer.LibFuzzer.Run(base64 => Base64_Url_Decode(base64, whitespaceMode)); //------------------------------------------------------------------------- - private static void Base64_Default_Decode(ReadOnlySpan base64) => Base64_DecodeCore(base64, Base64.Default); - private static void Base64_Url_Decode (ReadOnlySpan base64) => Base64_DecodeCore(base64, Base64.Url); + private static void Base64_Default_Decode(ReadOnlySpan base64, WhitespaceMode whitespaceMode) + => Base64_DecodeCore(base64, Base64.Default, whitespaceMode); + + private static void Base64_Url_Decode(ReadOnlySpan base64, WhitespaceMode whitespaceMode) + => Base64_DecodeCore(base64, Base64.Url, whitespaceMode); //------------------------------------------------------------------------- - private static void Base64_DecodeCore(ReadOnlySpan base64, Base64 encoder) + private static void Base64_DecodeCore(ReadOnlySpan base64, Base64 encoder, WhitespaceMode whitespaceMode) { char[]? arrayFromPool = null; @@ -26,9 +32,9 @@ private static void Base64_DecodeCore(ReadOnlySpan base64, Base64 encoder) : arrayFromPool = ArrayPool.Shared.Rent(maxChars); int written = Encoding.UTF8.GetChars(base64, chars); - chars = chars[..written]; + chars = chars[..written]; - Base64_Decode(chars, encoder); + Base64_Decode(chars, encoder, whitespaceMode); } finally { diff --git a/fuzz/gfoidl.Base64.FuzzTests/Program.cs b/fuzz/gfoidl.Base64.FuzzTests/Program.cs index 19948c9..f81c0d2 100644 --- a/fuzz/gfoidl.Base64.FuzzTests/Program.cs +++ b/fuzz/gfoidl.Base64.FuzzTests/Program.cs @@ -18,7 +18,9 @@ As first arg the fuzzing method (sut) must be given. List of supported suts: - Base64_Default_Decode + - Base64_Default_Decode_Whitespace - Base64_Url_Decode + - Base64_Url_Decode_Whitespace Or 'PrintMachineInfo' to display useful info about the machine, like enabled vectorization, etc. @@ -38,8 +40,10 @@ As first arg the fuzzing method (sut) must be given. switch (args[0]) { - case "Base64_Default_Decode": fuzzImpl.Base64_Default_Decode(); break; - case "Base64_Url_Decode" : fuzzImpl.Base64_Url_Decode() ; break; + case "Base64_Default_Decode" : fuzzImpl.Base64_Default_Decode(WhitespaceMode.None) ; break; + case "Base64_Url_Decode_Whitespace" : fuzzImpl.Base64_Url_Decode (WhitespaceMode.Allow); break; + case "Base64_Url_Decode" : fuzzImpl.Base64_Url_Decode (WhitespaceMode.None) ; break; + case "Base64_Default_Decode_Whitespace": fuzzImpl.Base64_Default_Decode(WhitespaceMode.Allow); break; default: Console.WriteLine($"Unknown fuzzing function: {args[0]}"); Environment.Exit(2); diff --git a/fuzz/run.sh b/fuzz/run.sh index 3a91013..cad91ea 100644 --- a/fuzz/run.sh +++ b/fuzz/run.sh @@ -9,11 +9,13 @@ path="gfoidl.Base64.FuzzTests/bin/Release/$tfm" "$path"/gfoidl.Base64.FuzzTests "PrintMachineInfo" if [[ $# -lt 3 ]]; then - echo "first arg must be the fuzz type, either 'afl' or 'libfuzzer'" + echo "first arg must be the fuzz type, either 'afl' or 'libFuzzer'" echo "second arg must be duration for timeout (in seconds)" echo "third arg must be the fuzz-function" echo " - Base64_Default_Decode" + echo " - Base64_Default_Decode_Whitespace" echo " - Base64_Url_Decode" + echo " - Base64_Url_Decode_Whitespace" exit 1 fi diff --git a/gfoidl.Base64.sln b/gfoidl.Base64.sln index 18924d0..230ff8c 100644 --- a/gfoidl.Base64.sln +++ b/gfoidl.Base64.sln @@ -60,6 +60,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "jobs", "jobs", "{BBE8166F-3 .azure\pipelines\jobs\deploy_docs.yml = .azure\pipelines\jobs\deploy_docs.yml .azure\pipelines\jobs\deploy_nuget.yml = .azure\pipelines\jobs\deploy_nuget.yml .azure\pipelines\jobs\fuzzing-afl.yml = .azure\pipelines\jobs\fuzzing-afl.yml + .azure\pipelines\jobs\fuzzing-libFuzzer-no-matrix.yml = .azure\pipelines\jobs\fuzzing-libFuzzer-no-matrix.yml .azure\pipelines\jobs\fuzzing-libFuzzer.yml = .azure\pipelines\jobs\fuzzing-libFuzzer.yml EndProjectSection EndProject @@ -125,6 +126,7 @@ EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "steps", "steps", "{66441B78-E744-459A-AC57-E0CDD6642E50}" ProjectSection(SolutionItems) = preProject .azure\pipelines\jobs\steps\dotnet-install.yml = .azure\pipelines\jobs\steps\dotnet-install.yml + .azure\pipelines\jobs\steps\fuzz.yml = .azure\pipelines\jobs\steps\fuzz.yml EndProjectSection EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "gfoidl.Base64.WebDemo", "demo\gfoidl.Base64.WebDemo\gfoidl.Base64.WebDemo.csproj", "{1FED75D7-CD44-4409-8BBA-F4E287B26291}" diff --git a/source/gfoidl.Base64/Base64.cs b/source/gfoidl.Base64/Base64.cs index 7c30127..144c43f 100644 --- a/source/gfoidl.Base64/Base64.cs +++ b/source/gfoidl.Base64/Base64.cs @@ -78,6 +78,24 @@ public abstract OperationStatus Decode( bool isFinalBlock = true); //------------------------------------------------------------------------- /// + public abstract OperationStatus Decode( + ReadOnlySpan encoded, + Span data, + out int consumed, + out int written, + WhitespaceMode whitespaceMode, + bool isFinalBlock = true); + //------------------------------------------------------------------------- + /// + public abstract OperationStatus Decode( + ReadOnlySpan encoded, + Span data, + out int consumed, + out int written, + WhitespaceMode whitespaceMode, + bool isFinalBlock = true); + //------------------------------------------------------------------------- + /// public abstract string Encode(ReadOnlySpan data); //------------------------------------------------------------------------- /// diff --git a/source/gfoidl.Base64/IBase64.cs b/source/gfoidl.Base64/IBase64.cs index dcf40a0..d188a2f 100644 --- a/source/gfoidl.Base64/IBase64.cs +++ b/source/gfoidl.Base64/IBase64.cs @@ -231,6 +231,100 @@ OperationStatus Decode( bool isFinalBlock = true); //------------------------------------------------------------------------- /// + /// Base64 decodes . + /// + /// The base64 encoded data. + /// The base64 encoded data to decode. + /// + /// The number of input bytes consumed during the operation. This can be used to slice the input for + /// subsequent calls, if necessary. + /// + /// + /// The number of bytes written into the output span. This can be used to slice the output for + /// subsequent calls, if necessary. + /// + /// + /// true (default) when the input span contains the entire data to decode. + /// Set to false only if it is known that the input span contains partial data with more data to follow. + /// + /// + /// Defines how whitespace (HTAB, LF; CR, SPACE) is treated. + /// + /// + /// It returns the OperationStatus enum values: + /// + /// Done - on successful processing of the entire input span + /// DestinationTooSmall - if there is not enough space in the output span to fit the decoded input + /// + /// NeedMoreData - only if isFinalBlock is false and the input is not a multiple of 4, otherwise the partial input + /// would be considered as InvalidData + /// + /// + /// InvalidData - if the input contains bytes outside of the expected base64 range, or if it contains invalid/more + /// than two padding characters, or if the input is incomplete (i.e. not a multiple of 4) and isFinalBlock is true. + /// + /// + /// + /// + /// Thrown for when the length is not conforming the base64Url standard. + /// set to false won't throw this exception. + /// + OperationStatus Decode( + ReadOnlySpan encoded, + Span data, + out int consumed, + out int written, + WhitespaceMode whitespaceMode, + bool isFinalBlock = true); + //------------------------------------------------------------------------- + /// + /// Base64 decodes . + /// + /// The base64 encoded data. + /// The base64 decoded data. + /// + /// The number of input chars consumed during the operation. This can be used to slice the input for + /// subsequent calls, if necessary. + /// + /// + /// The number of bytes written into the output span. This can be used to slice the output for + /// subsequent calls, if necessary. + /// + /// + /// true (default) when the input span contains the entire data to decode. + /// Set to false only if it is known that the input span contains partial data with more data to follow. + /// + /// + /// Defines how whitespace (HTAB, LF; CR, SPACE) is treated. + /// + /// + /// It returns the OperationStatus enum values: + /// + /// Done - on successful processing of the entire input span + /// DestinationTooSmall - if there is not enough space in the output span to fit the decoded input + /// + /// NeedMoreData - only if isFinalBlock is false and the input is not a multiple of 4, otherwise the partial input + /// would be considered as InvalidData + /// + /// + /// InvalidData - if the input contains chars outside of the expected base64 range, or if it contains invalid/more + /// than two padding characters, or if the input is incomplete (i.e. not a multiple of 4) and isFinalBlock is true. + /// + /// + /// + /// + /// Thrown for when the length is not conforming the base64Url standard. + /// set to false won't throw this exception. + /// + OperationStatus Decode( + ReadOnlySpan encoded, + Span data, + out int consumed, + out int written, + WhitespaceMode whitespaceMode, + bool isFinalBlock = true); + //------------------------------------------------------------------------- + /// /// Base64 encoded to a . /// /// The data to be base64 encoded. @@ -255,15 +349,5 @@ OperationStatus Decode( /// The input is not a valid Base64 string as it contains a non-base 64 character, /// more than two padding characters, or an illegal character among the padding characters. /// - /// - /// For base64 decoding on .NET Full or .NET Standard perf-wise it may be - /// better to use or - /// if possible.
- /// Please benchmark yourself. - /// - /// base64Url or .NET Core is not affected by this. - /// - ///
- /// byte[] Decode(ReadOnlySpan encoded); } diff --git a/source/gfoidl.Base64/Internal/Base64Impl.Decode.cs b/source/gfoidl.Base64/Internal/Base64Impl.Decode.cs index 48660c5..002ab04 100644 --- a/source/gfoidl.Base64/Internal/Base64Impl.Decode.cs +++ b/source/gfoidl.Base64/Internal/Base64Impl.Decode.cs @@ -146,7 +146,13 @@ ref Unsafe.Add(ref src, sourceIndex), // Handle last four bytes. There are 0, 1, 2 padding chars. ref T lastFourStart = ref Unsafe.Add(ref src, srcLength - 4); - if (paddingCount == 0) + // We have the paddingCount, but this is determined from the end. + // In order to detect invalid data, and decode only up to the last valid block, for base64 (which has padding) + // we need to check for padding starting at the last block. + // For base64Url we can re-use the already determined padding. + + // If last isn't padding, so decode 4 + if ((TBase64Encoder.HasPadding && !TOperation.IsPadding(Unsafe.Add(ref lastFourStart, 3))) || paddingCount == 0) { int result = DecodeFour(ref lastFourStart, ref decodingMap); @@ -157,7 +163,8 @@ ref Unsafe.Add(ref src, sourceIndex), sourceIndex += 4; destIndex += 3; } - else if (paddingCount == 1) + // If penultimate isn't padding, so decode 3 + else if ((TBase64Encoder.HasPadding && !TOperation.IsPadding(Unsafe.Add(ref lastFourStart, 2))) || paddingCount == 1) { int result = DecodeThree(ref lastFourStart, ref decodingMap); diff --git a/source/gfoidl.Base64/Internal/Base64Impl.DecodeWhitespace.cs b/source/gfoidl.Base64/Internal/Base64Impl.DecodeWhitespace.cs new file mode 100644 index 0000000..fb8c4c8 --- /dev/null +++ b/source/gfoidl.Base64/Internal/Base64Impl.DecodeWhitespace.cs @@ -0,0 +1,198 @@ +// (c) gfoidl, all rights reserved + +using System.Buffers; +using System.Runtime.CompilerServices; + +namespace gfoidl.Base64.Internal; + +partial class Base64Impl +{ + public override OperationStatus Decode(ReadOnlySpan encoded, Span data, out int consumed, out int written, WhitespaceMode whitespaceMode, bool isFinalBlock = true) + { + return whitespaceMode == WhitespaceMode.None + ? Decode (encoded, data, out consumed, out written, isFinalBlock) + : DecodeWithWithespace(encoded, data, out consumed, out written, isFinalBlock); + } + //------------------------------------------------------------------------- + public override OperationStatus Decode(ReadOnlySpan encoded, Span data, out int consumed, out int written, WhitespaceMode whitespaceMode, bool isFinalBlock = true) + { + return whitespaceMode == WhitespaceMode.None + ? Decode (encoded, data, out consumed, out written, isFinalBlock) + : DecodeWithWithespace(encoded, data, out consumed, out written, isFinalBlock); + } + //------------------------------------------------------------------------- + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static OperationStatus DecodeWithWithespace( + ReadOnlySpan encoded, + Span dataBuffer, + out int consumed, + out int written, + bool isFinalBlock = true) + where T : unmanaged + where TOperation : IOperation + { + OperationStatus status; + consumed = 0; + written = 0; + + while (true) + { + status = Decode(encoded, dataBuffer, out int localConsumed, out int localWritten, isFinalBlock); + consumed += localConsumed; + written += localWritten; + + if (status is not (OperationStatus.InvalidData or OperationStatus.DestinationTooSmall)) + { + break; + } + + encoded = encoded .Slice(localConsumed); + dataBuffer = dataBuffer.Slice(localWritten); + + if (!TrySkipWhitespace(encoded, out localConsumed)) + { + if (localConsumed > 0) + { + consumed += localConsumed; + status = OperationStatus.Done; + } + + break; + } + + if (localConsumed == 0) + { + // First char isn't whitespace, but we didn't consume anything, thus + // the input may have whitespace anywhere in between. So fall back to + // block-wise decoding. + return DecodeWithWithespaceBlockwise(encoded, dataBuffer, out consumed, out written, isFinalBlock); + } + + encoded = encoded.Slice(localConsumed); + consumed += localConsumed; + } + + return status; + //--------------------------------------------------------------------- + static bool TrySkipWhitespace(ReadOnlySpan encoded, out int consumed) + { + int i = 0; + + for (; i < encoded.Length; ++i) + { + if (!TOperation.IsWhitespace(encoded[i])) + { + consumed = i; + return true; + } + } + + consumed = i; + return false; + } + } + //------------------------------------------------------------------------- + private static OperationStatus DecodeWithWithespaceBlockwise( + ReadOnlySpan encoded, + Span dataBuffer, + out int consumed, + out int written, + bool isFinalBlock) + where T : unmanaged + where TOperation : IOperation + { + // To make the compiler happy + Unsafe.SkipInit(out consumed); + Unsafe.SkipInit(out written); + + const int BlockSize = 4; + Span buffer = stackalloc T[BlockSize]; + OperationStatus status = OperationStatus.Done; + + while (!encoded.IsEmpty) + { + int encodedIdx = 0; + int bufferIdx = 0; + int skipped = 0; + + for (; encodedIdx < encoded.Length && (uint)bufferIdx < (uint)buffer.Length; ++encodedIdx) + { + if (TOperation.IsWhitespace(encoded[encodedIdx])) + { + skipped++; + } + else + { + buffer[bufferIdx] = encoded[encodedIdx]; + bufferIdx++; + } + } + + encoded = encoded.Slice(encodedIdx); + consumed += skipped; + + if (bufferIdx == 0) + { + continue; + } + + bool hasAnotherBlock, localIsFinalBlock; + + if (TBase64Encoder.HasPadding) + { + hasAnotherBlock = encoded.Length >= BlockSize && bufferIdx == BlockSize; + localIsFinalBlock = !hasAnotherBlock; + + // If this block contains padding and there's another block, then only whitespace may follow + // for being valid. + if (hasAnotherBlock) + { + int paddingCount = TOperation.GetPaddingCount(ref buffer[^1]); + if (paddingCount > 0) + { + hasAnotherBlock = false; + localIsFinalBlock = true; + } + } + } + else + { + hasAnotherBlock = !encoded.IsEmpty; + localIsFinalBlock = !hasAnotherBlock; + } + + if (localIsFinalBlock && !isFinalBlock) + { + localIsFinalBlock = false; + } + + status = Decode(buffer.Slice(0, bufferIdx), dataBuffer, out int localConsumed, out int localWritten, localIsFinalBlock); + consumed += localConsumed; + written += localWritten; + dataBuffer = dataBuffer.Slice(localWritten); + + if (status != OperationStatus.Done) + { + return status; + } + + // The remaining data must all be whitespace in order to be valid + if (!hasAnotherBlock) + { + for (int i = 0; i < encoded.Length; ++i) + { + if (!TOperation.IsWhitespace(encoded[i])) + { + return OperationStatus.InvalidData; + } + + consumed++; + } + + break; + } + } + + return status; + } +} diff --git a/source/gfoidl.Base64/Internal/Operations/ByteOperation.cs b/source/gfoidl.Base64/Internal/Operations/ByteOperation.cs index de6b0c5..79365c5 100644 --- a/source/gfoidl.Base64/Internal/Operations/ByteOperation.cs +++ b/source/gfoidl.Base64/Internal/Operations/ByteOperation.cs @@ -104,4 +104,17 @@ public static int GetPaddingCount(ref byte ptrToLastElement) return padding; } + //------------------------------------------------------------------------- + [DebuggerStepThrough] + public static bool IsPadding(byte value) => value == Constants.EncodingPadByte; + //------------------------------------------------------------------------- + [DebuggerStepThrough] + public static bool IsWhitespace(byte value) => CommonOperation.IsWhitespace(value); + //------------------------------------------------------------------------- + [DebuggerStepThrough] + public static bool IsInvalid(byte value, ReadOnlySpan decodingMap, bool ignorePadding = true, bool ignoreWhitespace = true) + { + return decodingMap[value] < 0 + && !((ignorePadding && IsPadding(value)) || (ignoreWhitespace && IsWhitespace(value))); + } } diff --git a/source/gfoidl.Base64/Internal/Operations/CharOperation.cs b/source/gfoidl.Base64/Internal/Operations/CharOperation.cs index 539db66..6c7da45 100644 --- a/source/gfoidl.Base64/Internal/Operations/CharOperation.cs +++ b/source/gfoidl.Base64/Internal/Operations/CharOperation.cs @@ -213,4 +213,17 @@ public static int GetPaddingCount(ref char ptrToLastElement) return padding; } + //------------------------------------------------------------------------- + [DebuggerStepThrough] + public static bool IsPadding(char value) => value == Constants.EncodingPadChar; + //------------------------------------------------------------------------- + [DebuggerStepThrough] + public static bool IsWhitespace(char value) => CommonOperation.IsWhitespace(value); + //------------------------------------------------------------------------- + [DebuggerStepThrough] + public static bool IsInvalid(char value, ReadOnlySpan decodingMap, bool ignorePadding = true, bool ignoreWhitespace = true) + { + return (value >= decodingMap.Length || decodingMap[value] < 0) + && !((ignorePadding && IsPadding(value)) || (ignoreWhitespace && IsWhitespace(value))); + } } diff --git a/source/gfoidl.Base64/Internal/Operations/CommonOperation.cs b/source/gfoidl.Base64/Internal/Operations/CommonOperation.cs new file mode 100644 index 0000000..9f68c29 --- /dev/null +++ b/source/gfoidl.Base64/Internal/Operations/CommonOperation.cs @@ -0,0 +1,52 @@ +// (c) gfoidl, all rights reserved + +using System.Runtime.CompilerServices; + +namespace gfoidl.Base64.Internal; + +internal readonly struct CommonOperation +{ + [DebuggerStepThrough] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool IsWhitespace(int value) + { + if (Environment.Is64BitProcess) + { + // For description see https://github.com/dotnet/runtime/blob/48e74187cb15386c29eedaa046a5ee2c7ddef161/src/libraries/Common/src/System/HexConverter.cs#L314-L330 + /* Constant created with + using System; + + string validValues = "\t\n\r "; + ulong magic = 0; + + foreach (char c in validValues) + { + int idx = c - '\t'; // lowest value of allowed set + + magic |= 1UL << (64 - 1 - idx); + } + + Console.WriteLine(magic); + Console.WriteLine($"0x{magic:X16}"); + */ + + const ulong MagicConstant = 0xC800010000000000UL; + + ulong i = (uint)value - '\t'; + ulong shift = MagicConstant << (int)i; + ulong mask = i - 64; + + return (long)(shift & mask) < 0; + } + else + { + if (value < 32) + { + const int BitMask = (1 << 9) | (1 << 10) | (1 << 13); + return ((1 << value) & BitMask) != 0; + } + + return value == 32; + } + } +} diff --git a/source/gfoidl.Base64/Internal/Operations/IOperation.cs b/source/gfoidl.Base64/Internal/Operations/IOperation.cs index d03dbb8..e45b558 100644 --- a/source/gfoidl.Base64/Internal/Operations/IOperation.cs +++ b/source/gfoidl.Base64/Internal/Operations/IOperation.cs @@ -19,4 +19,7 @@ internal interface IOperation where T : struct static abstract nuint Write1EncodedByte (ref T dest, uint i0, uint i1, bool addPadding); //------------------------------------------------------------------------- static abstract int GetPaddingCount(ref T ptrToLastElement); + static abstract bool IsPadding(T value); + static abstract bool IsWhitespace(T value); + static abstract bool IsInvalid(T value, ReadOnlySpan decodingMap, bool ignorePadding = true, bool ignoreWhitespace = true); } diff --git a/source/gfoidl.Base64/WhitespaceMode.cs b/source/gfoidl.Base64/WhitespaceMode.cs new file mode 100644 index 0000000..2c3b6b3 --- /dev/null +++ b/source/gfoidl.Base64/WhitespaceMode.cs @@ -0,0 +1,19 @@ +// (c) gfoidl, all rights reserved + +namespace gfoidl.Base64; + +/// +/// Defines how whitespace (HTAB, LF; CR, SPACE) is treated. +/// +public enum WhitespaceMode +{ + /// + /// Treats whitspace as invalid input. + /// + None = 0, + + /// + /// Treats whitespace as valid input -- i.e. it's ignored / skipped over. + /// + Allow = 1 +} diff --git a/tests/gfoidl.Base64.Tests/FuzzVerification.cs b/tests/gfoidl.Base64.Tests/FuzzVerification.cs index 1c068b8..f3e3f15 100644 --- a/tests/gfoidl.Base64.Tests/FuzzVerification.cs +++ b/tests/gfoidl.Base64.Tests/FuzzVerification.cs @@ -43,11 +43,61 @@ public void Invalid_data_char(string fileName) Assert.AreEqual(OperationStatus.InvalidData, status); } //------------------------------------------------------------------------- - private static IEnumerable Invalid_data_TestCases() + [Test, TestCaseSource(nameof(Whitespace_TestCases))] + public void Whitespace_data_byte(string fileName) { - foreach (string file in Directory.EnumerateFiles(Path.Combine(TestContext.CurrentContext.TestDirectory, BasePath, "InvalidData"))) + byte[] encoded = File.ReadAllBytes(fileName); + +#if DEBUG + TestContext.WriteLine(Encoding.UTF8.GetString(encoded)); +#endif + + Span data = new byte[_encoder.GetMaxDecodedLength(encoded.Length)]; + OperationStatus status = _encoder.Decode(encoded, data, out int consumed, out int _, WhitespaceMode.Allow); + + if (status == OperationStatus.Done) + { + Assert.AreEqual(encoded.Length, consumed, nameof(consumed)); + } + else + { + Assert.AreEqual(OperationStatus.InvalidData, status); + } + } + //------------------------------------------------------------------------- + [Test, TestCaseSource(nameof(Whitespace_TestCases))] + public void Whitespace_data_char(string fileName) + { + string encoded = File.ReadAllText(fileName); + +#if DEBUG + TestContext.WriteLine(encoded); +#endif + + Span data = new byte[_encoder.GetMaxDecodedLength(encoded.Length)]; + OperationStatus status = _encoder.Decode(encoded, data, out int consumed, out int _, WhitespaceMode.Allow); + + if (status == OperationStatus.Done) + { + Assert.AreEqual(encoded.Length, consumed, nameof(consumed)); + } + else + { + Assert.AreEqual(OperationStatus.InvalidData, status); + } + } + //------------------------------------------------------------------------- + private static IEnumerable Invalid_data_TestCases() => GetTestFiles("InvalidData"); + private static IEnumerable Whitespace_TestCases() => GetTestFiles("Whitespace"); + //------------------------------------------------------------------------- + private static IEnumerable GetTestFiles(string pathSegment) + { + foreach (string file in Directory.EnumerateFiles(Path.Combine(TestContext.CurrentContext.TestDirectory, BasePath, pathSegment))) { string fileName = Path.GetFileName(file); + + if (fileName == ".gitkeep") continue; + yield return new TestCaseData(file).SetArgDisplayNames(fileName); } } diff --git a/tests/gfoidl.Base64.Tests/Internal/Base64ImplTests/Decode/BasicDecoding.cs b/tests/gfoidl.Base64.Tests/Internal/Base64ImplTests/Decode/BasicDecoding.cs index 22dd04c..1865e4c 100644 --- a/tests/gfoidl.Base64.Tests/Internal/Base64ImplTests/Decode/BasicDecoding.cs +++ b/tests/gfoidl.Base64.Tests/Internal/Base64ImplTests/Decode/BasicDecoding.cs @@ -214,6 +214,9 @@ public void Basic_decoding_with_invalid_input___InvalidData(string input, int ex Assert.AreEqual(expectedConsumed , consumed); Assert.AreEqual(expectedWritten , written); + string expectedBase64 = Convert.ToBase64String(actualData); + StringAssert.StartsWith(expectedBase64, input); + byte[] expectedData = Convert.FromBase64String(input.Substring(0, consumed).FromBase64Url()); CollectionAssert.AreEqual(expectedData, actualData); }); @@ -230,6 +233,9 @@ private static IEnumerable Basic_decoding_with_invalid_input___Inv yield return new TestCaseData("z/TpH7sqEkerqMwe\u00ecH1uSw==", 16, 12); // ì yield return new TestCaseData("\ud83c\udf04a/TpH7sqEkerqMwelH1uSw", 0, 0); // 🌄 + yield return new TestCaseData("z/DpkeqEkel½=" , 8, 6); + yield return new TestCaseData("z/TpTpHlrMTpTp÷==", 12, 9); + if (typeof(TEncoder) == typeof(Base64Impl)) { // These are valid for base64Url diff --git a/tests/gfoidl.Base64.Tests/Internal/Base64ImplTests/Decode/Whitespace.cs b/tests/gfoidl.Base64.Tests/Internal/Base64ImplTests/Decode/Whitespace.cs new file mode 100644 index 0000000..e7b3644 --- /dev/null +++ b/tests/gfoidl.Base64.Tests/Internal/Base64ImplTests/Decode/Whitespace.cs @@ -0,0 +1,329 @@ +// (c) gfoidl, all rights reserved + +using System.Buffers; +using System.Text; +using gfoidl.Base64.Internal; +using NUnit.Framework.Internal; + +namespace gfoidl.Base64.Tests.Internal.Base64ImplTests.Decode; + +[TestFixture(typeof(Base64Impl) , typeof(byte))] +[TestFixture(typeof(Base64Impl), typeof(byte))] +[TestFixture(typeof(Base64Impl) , typeof(char))] +[TestFixture(typeof(Base64Impl), typeof(char))] +public class Whitespace + where TEncoder : Base64, new() + where T : struct +{ + private readonly TEncoder _sut = new(); + //------------------------------------------------------------------------- + [Test, TestCaseSource(nameof(Whitespace_after_76_chars_TestCases))] + public void Whitespace_after_76_chars(string base64Text, int expectedConsumed, int expectedWritten) + { + OperationStatus status; + int consumed, written; + byte[] data = new byte[base64Text.Length]; + + if (typeof(T) == typeof(byte)) + { + byte[] base64 = Encoding.ASCII.GetBytes(base64Text); + status = _sut.Decode(base64, data, out consumed, out written, WhitespaceMode.Allow); + } + else if (typeof(T) == typeof(char)) + { + status = _sut.Decode(base64Text, data, out consumed, out written, WhitespaceMode.Allow); + } + else + { + throw new NotSupportedException(); // just in case new types are introduced in the future + } + + Assert.Multiple(() => + { + Assert.AreEqual(OperationStatus.Done, status); + Assert.AreEqual(expectedConsumed , consumed, nameof(consumed)); + Assert.AreEqual(expectedWritten , written , nameof(written)); + }); + } + //------------------------------------------------------------------------- + private static IEnumerable Whitespace_after_76_chars_TestCases() + { + byte[] data = new byte[200]; + TestContext.CurrentContext.Random.NextBytes(data); + + string base64Text = Convert.ToBase64String(data, Base64FormattingOptions.InsertLineBreaks); + + if (typeof(TEncoder) == typeof(Base64Impl)) + { + // Poor man's base64Url encoding + base64Text = base64Text.Replace('+', '-').Replace('/', '_').TrimEnd('='); + } + + yield return new TestCaseData(base64Text, base64Text.Length, data.Length).SetArgDisplayNames("default linebreaks"); + + base64Text = base64Text.Replace("\r\n", "\t"); + yield return new TestCaseData(base64Text, base64Text.Length, data.Length).SetArgDisplayNames("tab"); + + base64Text = base64Text.Replace("\t", " "); + yield return new TestCaseData(base64Text, base64Text.Length, data.Length).SetArgDisplayNames("single space"); + + base64Text = base64Text.Replace(" ", " "); + yield return new TestCaseData(base64Text, base64Text.Length, data.Length).SetArgDisplayNames("spaces"); + } + //------------------------------------------------------------------------- + [Test] + [TestCase(" ")] + [TestCase(" ")] + [TestCase("\t")] + [TestCase("\n")] + [TestCase("\r\n")] + public void Only_whitespace___valid(string base64Text) + { + OperationStatus status; + int consumed, written; + byte[] data = new byte[100]; + + if (typeof(T) == typeof(byte)) + { + byte[] base64 = Encoding.ASCII.GetBytes(base64Text); + status = _sut.Decode(base64, data, out consumed, out written, WhitespaceMode.Allow); + } + else if (typeof(T) == typeof(char)) + { + status = _sut.Decode(base64Text, data, out consumed, out written, WhitespaceMode.Allow); + } + else + { + throw new NotSupportedException(); // just in case new types are introduced in the future + } + + Assert.Multiple(() => + { + Assert.AreEqual(OperationStatus.Done, status); + Assert.AreEqual(base64Text.Length , consumed, nameof(consumed)); + Assert.AreEqual(0 , written , nameof(written)); + }); + } + //------------------------------------------------------------------------- + [Test, TestCaseSource(nameof(Non_well_formed_whitespaces_but_valid_TestCases))] + public void Non_well_formed_whitespaces_but_valid(string base64Text, int expectedWritten) + { + OperationStatus status; + int consumed, written; + byte[] data = new byte[100]; + + if (typeof(T) == typeof(byte)) + { + byte[] base64 = Encoding.ASCII.GetBytes(base64Text); + status = _sut.Decode(base64, data, out consumed, out written, WhitespaceMode.Allow); + } + else if (typeof(T) == typeof(char)) + { + status = _sut.Decode(base64Text, data, out consumed, out written, WhitespaceMode.Allow); + } + else + { + throw new NotSupportedException(); // just in case new types are introduced in the future + } + + Assert.Multiple(() => + { + Assert.AreEqual(OperationStatus.Done, status); + Assert.AreEqual(base64Text.Length , consumed, nameof(consumed)); + Assert.AreEqual(expectedWritten , written , nameof(written)); + }); + } + //------------------------------------------------------------------------- + private static IEnumerable Non_well_formed_whitespaces_but_valid_TestCases() + { + return CreateTestCases(Core()); + //--------------------------------------------------------------------- + static IEnumerable Core() + { + yield return new TestCaseData("YQ==" , 1); + yield return new TestCaseData(" YQ==" , 1); + yield return new TestCaseData("YQ== " , 1); + yield return new TestCaseData("YQ== " , 1); + yield return new TestCaseData("YQ ==" , 1); + yield return new TestCaseData("YQ ==" , 1); + yield return new TestCaseData("YQ ==" , 1); + yield return new TestCaseData("YQ= = " , 1); + yield return new TestCaseData("YQ = = " , 1); + yield return new TestCaseData("Y Q = = " , 1); + yield return new TestCaseData(" Y Q = = " , 1); + yield return new TestCaseData("\tY\nQ\r=\t=\n", 1); + + yield return new TestCaseData("YQB= " , 2); + yield return new TestCaseData("YQB =" , 2); + yield return new TestCaseData("YQB =" , 2); + yield return new TestCaseData("YQB =", 2); + + yield return new TestCaseData("c 1cyIQ==", 4); + } + } + //------------------------------------------------------------------------- + [Test, TestCaseSource(nameof(Invalid_with_whitespace_TestCases))] + public void Invalid_with_whitespace(string base64Text, int expectedConsumed, int expectedWritten) + { + OperationStatus status; + int consumed, written; + byte[] data = new byte[100]; + + if (typeof(T) == typeof(byte)) + { + byte[] base64 = Encoding.ASCII.GetBytes(base64Text); + status = _sut.Decode(base64, data, out consumed, out written, WhitespaceMode.Allow); + } + else if (typeof(T) == typeof(char)) + { + status = _sut.Decode(base64Text, data, out consumed, out written, WhitespaceMode.Allow); + } + else + { + throw new NotSupportedException(); // just in case new types are introduced in the future + } + + Assert.Multiple(() => + { + Assert.AreEqual(OperationStatus.InvalidData, status); + Assert.AreEqual(expectedConsumed , consumed, nameof(consumed)); + Assert.AreEqual(expectedWritten , written , nameof(written)); + }); + } + //------------------------------------------------------------------------- + private static IEnumerable Invalid_with_whitespace_TestCases() + { + return CreateTestCases(Core()); + //--------------------------------------------------------------------- + static IEnumerable Core() + { + if (typeof(TEncoder) != typeof(Base64Impl)) + { + // These are valid for base64Url if the = is removed! + yield return new TestCaseData(" YQ == a", 7, 1); + yield return new TestCaseData(" YQ = a" , 3, 0); + + yield return new TestCaseData("=" , 0, 0); + yield return new TestCaseData("====ab" , 0, 0); + yield return new TestCaseData("====ab==", 0, 0); + + yield return new TestCaseData("z/DpkeqEkel½=" , 8, 6); + yield return new TestCaseData("z/TpTpHlrMTpTp÷==", 12, 9); + } + + yield return new TestCaseData("?abc" , 0, 0); + yield return new TestCaseData("?abc " , 0, 0); + yield return new TestCaseData(" ?abc" , 1, 0); + yield return new TestCaseData(" ?abc ", 1, 0); + } + } + //------------------------------------------------------------------------- + [Test] + public void Whitespace_at_various_places___valid_and_same_behavior_as_Convert_FromBase64() + { + byte[] data = new byte[20]; + TestContext.WriteLine("Random seed: {0}", Randomizer.InitialSeed); + TestContext.CurrentContext.Random.NextBytes(data); + + int testCount = 0; + + for (int i = 1; i < data.Length; ++i) + { + for (int j = 0; j < 20; ++j) + { + for (int k = 0; k <= 5; ++k) + { + testCount += Core(_sut, data.AsSpan(0, i), whitespaceInstances: j, mode: k); + } + } + } + + TestContext.WriteLine("Run {0} tests", testCount); + //--------------------------------------------------------------------- + static int Core(TEncoder sut, ReadOnlySpan origData, int whitespaceInstances, int mode) + { + string base64Text = Convert.ToBase64String(origData); + string base64TextOrig = base64Text; + + if (typeof(TEncoder) == typeof(Base64Impl)) + { + // Poor man's base64Url encoding + base64Text = base64Text.Replace('+', '-').Replace('/', '_').TrimEnd('='); + } + + string base64Copy = base64Text; + string spacesToInsert = new(' ', whitespaceInstances); + int count = 0; + byte[] data = new byte[1000]; + + for (int i = 0; i < base64Copy.Length; ++i) + { + count++; + data.AsSpan().Clear(); + + ReadOnlySpan firstSegment = base64Copy.AsSpan(0, i); + ReadOnlySpan secondSegment = base64Copy.AsSpan(i); + + base64Text = mode switch + { + 0 => $"{firstSegment}{secondSegment}{spacesToInsert}", + 1 => $"{firstSegment}{spacesToInsert}{secondSegment}", + 2 => $"{spacesToInsert}{firstSegment}{secondSegment}", + 3 => $"{firstSegment}{spacesToInsert}{secondSegment}{spacesToInsert}", + 4 => $"{spacesToInsert}{firstSegment}{spacesToInsert}{secondSegment}", + 5 => $"{spacesToInsert}{firstSegment}{spacesToInsert}{secondSegment}{spacesToInsert}", + _ => throw new InvalidOperationException($"'mode' must be in interval [0, 5], given: {mode}") + }; + + OperationStatus status; + int consumed, written; + + if (typeof(T) == typeof(byte)) + { + byte[] base64 = Encoding.ASCII.GetBytes(base64Text); + status = sut.Decode(base64, data, out consumed, out written, WhitespaceMode.Allow); + } + else if (typeof(T) == typeof(char)) + { + status = sut.Decode(base64Text, data, out consumed, out written, WhitespaceMode.Allow); + } + else + { + throw new NotSupportedException(); // just in case new types are introduced in the future + } + + byte[] dataConvertFromBase64 = Convert.FromBase64String(base64TextOrig); + + Assert.Multiple(() => + { + Assert.AreEqual(OperationStatus.Done , status , "Failure for {1} with '{0}'", base64Text, nameof(status)); + Assert.AreEqual(base64Text.Length , consumed, "Failure for {1} with '{0}'", base64Text, nameof(consumed)); + Assert.AreEqual(dataConvertFromBase64.Length, written , "Failure for {1} with '{0}'", base64Text, nameof(written)); + }); + + string textFromConvertFromBase64 = Encoding.UTF8.GetString(dataConvertFromBase64); + string textFromBase64 = Encoding.UTF8.GetString(data.AsSpan(0, written)); + + Assert.AreEqual(textFromConvertFromBase64, textFromBase64); + } + + return count; + } + } + //------------------------------------------------------------------------- + private static IEnumerable CreateTestCases(IEnumerable source) + { + foreach (TestCaseData testCaseData in source) + { + if (typeof(TEncoder) == typeof(Base64Impl)) + { + string base64Text = testCaseData.Arguments[0] as string; + base64Text = base64Text.Replace("=", null); + testCaseData.Arguments[0] = base64Text; + testCaseData.OriginalArguments[0] = base64Text; + } + + yield return testCaseData; + } + } +} diff --git a/tests/gfoidl.Base64.Tests/Internal/OperationsTests/IsInvalid.cs b/tests/gfoidl.Base64.Tests/Internal/OperationsTests/IsInvalid.cs new file mode 100644 index 0000000..3f30193 --- /dev/null +++ b/tests/gfoidl.Base64.Tests/Internal/OperationsTests/IsInvalid.cs @@ -0,0 +1,56 @@ +// (c) gfoidl, all rights reserved + +using gfoidl.Base64.Internal; + +namespace gfoidl.Base64.Tests.Internal.OperationsTests; + +[TestFixture(typeof(Base64Encoding))] +[TestFixture(typeof(Base64UrlEncoding))] +internal class IsInvalid where TBase64Encoding : IBase64Encoding +{ + [Test] + public void Char_outside_decodingmap_size___true() + { + int value = TBase64Encoding.DecodingMap.Length; + + bool actual = CharOperation.IsInvalid((char)value, TBase64Encoding.DecodingMap); + + Assert.IsTrue(actual); + } + //------------------------------------------------------------------------- + [Test] + public void Byte_padding_char([Values(true, false)] bool ignorePadding) + { + bool actual = ByteOperation.IsInvalid((byte)'=', TBase64Encoding.DecodingMap, ignorePadding); + + Assert.AreEqual(!ignorePadding, actual); + } + //------------------------------------------------------------------------- + [Test] + public void Char_padding_char___false([Values(true, false)] bool ignorePadding) + { + bool actual = CharOperation.IsInvalid('=', TBase64Encoding.DecodingMap, ignorePadding); + + Assert.AreEqual(!ignorePadding, actual); + } + //------------------------------------------------------------------------- + [Test] + public void Byte_whitespace___false( + [Values('\t', '\n', '\r', ' ')] int value, + [Values(true, false)] bool ignoreWhitespace) + { + bool actual = ByteOperation.IsInvalid((byte)value, TBase64Encoding.DecodingMap, ignoreWhitespace: ignoreWhitespace); + + Assert.AreEqual(!ignoreWhitespace, actual); + } + //------------------------------------------------------------------------- + [Test] + public void Char_whitespace___false( + [Values('\t', '\n', '\r', ' ')] int value, + [Values(true, false)] bool ignoreWhitespace) + { + bool actual = CharOperation.IsInvalid((char)value, TBase64Encoding.DecodingMap, ignoreWhitespace: ignoreWhitespace); + + Assert.AreEqual(!ignoreWhitespace, actual); + } +} diff --git a/tests/gfoidl.Base64.Tests/Internal/OperationsTests/IsWhitespace.cs b/tests/gfoidl.Base64.Tests/Internal/OperationsTests/IsWhitespace.cs new file mode 100644 index 0000000..19745cb --- /dev/null +++ b/tests/gfoidl.Base64.Tests/Internal/OperationsTests/IsWhitespace.cs @@ -0,0 +1,59 @@ +// (c) gfoidl, all rights reserved + +using gfoidl.Base64.Internal; + +namespace gfoidl.Base64.Tests.Internal.OperationsTests; + +[TestFixture] +public class IsWhitespace +{ + private static readonly HashSet s_whitespace = new() { '\t', '\n', '\r', ' ' }; + //------------------------------------------------------------------------- + [Test] + public void Byte_whitespace_given___true() + { + foreach (int value in s_whitespace) + { + bool actual = ByteOperation.IsWhitespace((byte)value); + + Assert.IsTrue(actual, "Failure at {0}", value); + } + } + //------------------------------------------------------------------------- + [Test] + public void Char_whitespace_given___true() + { + foreach (int value in s_whitespace) + { + bool actual = CharOperation.IsWhitespace((char)value); + + Assert.IsTrue(actual, "Failure at {0}", value); + } + } + //------------------------------------------------------------------------- + [Test] + public void Byte_not_whitespace___false() + { + for (int value = byte.MinValue; value <= byte.MaxValue; ++value) + { + if (s_whitespace.Contains(value)) continue; + + bool actual = ByteOperation.IsWhitespace((byte)value); + + Assert.IsFalse(actual); + } + } + //------------------------------------------------------------------------- + [Test] + public void Char_not_whitespace___false() + { + for (int value = char.MinValue; value <= char.MaxValue; ++value) + { + if (s_whitespace.Contains(value)) continue; + + bool actual = CharOperation.IsWhitespace((char)value); + + Assert.IsFalse(actual); + } + } +} diff --git a/tests/gfoidl.Base64.Tests/data/fuzz-findings/decoding/Whitespace/crash-consumed_count b/tests/gfoidl.Base64.Tests/data/fuzz-findings/decoding/Whitespace/crash-consumed_count new file mode 100644 index 0000000..af3657f --- /dev/null +++ b/tests/gfoidl.Base64.Tests/data/fuzz-findings/decoding/Whitespace/crash-consumed_count @@ -0,0 +1,2 @@ + Tp+e Tpke8 pke + \ No newline at end of file diff --git a/tests/gfoidl.Base64.Tests/data/fuzz-findings/decoding/Whitespace/crash-padding_at_start b/tests/gfoidl.Base64.Tests/data/fuzz-findings/decoding/Whitespace/crash-padding_at_start new file mode 100644 index 0000000..3654a88 --- /dev/null +++ b/tests/gfoidl.Base64.Tests/data/fuzz-findings/decoding/Whitespace/crash-padding_at_start @@ -0,0 +1 @@ +=========EzqT#w \ No newline at end of file