From b5226fe18e2c7d03ac173fcba475e54269dabe0d Mon Sep 17 00:00:00 2001 From: Sebastien Ros Date: Fri, 15 Aug 2025 10:04:46 -0700 Subject: [PATCH] Add NoneOf parsers --- Parlot.sln | 12 +++- docs/parsers.md | 38 +++++++++++- src/Parlot/Fluent/ListOfCharsLiteral.cs | 6 +- src/Parlot/Fluent/Parsers.cs | 32 ++++++++-- src/Parlot/Fluent/SearchValuesCharLiteral.cs | 9 ++- test/Parlot.Tests/FluentTests.cs | 61 +++++++++++++++++++- 6 files changed, 142 insertions(+), 16 deletions(-) diff --git a/Parlot.sln b/Parlot.sln index 9dec1305..bd95ad78 100644 --- a/Parlot.sln +++ b/Parlot.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.1.31903.286 +# Visual Studio Version 18 +VisualStudioVersion = 18.0.10912.84 main MinimumVisualStudioVersion = 15.0.26124.0 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{0D1E6480-3C81-4951-8F44-BF74398BA8D4}" EndProject @@ -17,15 +17,21 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution ProjectSection(SolutionItems) = preProject .editorconfig = .editorconfig .github\workflows\build.yml = .github\workflows\build.yml + Directory.Build.props = Directory.Build.props Directory.Packages.props = Directory.Packages.props NuGet.config = NuGet.config .github\workflows\publish.yml = .github\workflows\publish.yml README.md = README.md - Directory.Build.props = Directory.Build.props EndProjectSection EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Samples", "src\Samples\Samples.csproj", "{B9A796FE-4BEB-499A-B506-25F20C749527}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "docs", "docs", "{02EA681E-C7D8-13C7-8484-4AC65E1B71E8}" + ProjectSection(SolutionItems) = preProject + docs\parsers.md = docs\parsers.md + docs\writing.md = docs\writing.md + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU diff --git a/docs/parsers.md b/docs/parsers.md index 849fb577..ac46b42d 100644 --- a/docs/parsers.md +++ b/docs/parsers.md @@ -85,7 +85,7 @@ Usage: ```c# var input = "hello world"; -var parser = Terms.Char("h"); +var parser = Terms.Char('h'); ``` Result: @@ -161,7 +161,7 @@ Result: ### String -Matches a quoted string literal, optionally use single or double enclosing quotes. +Matches a quoted string literal with escape sequences. Use this parser to parse strings from a programming language. ```c# Parser String(StringLiteralQuotes quotes = StringLiteralQuotes.SingleOrDouble) @@ -227,6 +227,12 @@ abab Matches any chars from a list of chars. +```c# +Parser AnyOf(string values, int minSize = 1, int maxSize = 0) +``` + +The following overloads are available when targeting .NET 8 or later and use vectorized parsing for better performance. + ```c# Parser AnyOf(ReadOnlySpan values, int minSize = 1, int maxSize = 0) Parser AnyOf(SearchValue searchValues, int minSize = 1, int maxSize = 0) @@ -245,6 +251,34 @@ Result: abab ``` +### NoneOf + +Matches any other chars than the ones specified. + +```c# +Parser NoneOf(string values, int minSize = 1, int maxSize = 0) +``` + +The following overloads are available when targeting .NET 8 or later and use vectorized parsing for better performance. + +```c# +Parser NoneOf(ReadOnlySpan values, int minSize = 1, int maxSize = 0) +Parser NoneOf(SearchValue searchValues, int minSize = 1, int maxSize = 0) +``` + +Usage: + +```c# +var input = "ababcad"; +var parser = Terms.NoneOf("cd"); +``` + +Result: + +``` +abab +``` + ## Combining parsers ### Or diff --git a/src/Parlot/Fluent/ListOfCharsLiteral.cs b/src/Parlot/Fluent/ListOfCharsLiteral.cs index be9d8a35..66c537af 100644 --- a/src/Parlot/Fluent/ListOfCharsLiteral.cs +++ b/src/Parlot/Fluent/ListOfCharsLiteral.cs @@ -9,6 +9,7 @@ internal sealed class ListOfChars : Parser, ISeekable private readonly CharMap _map = new(); private readonly int _minSize; private readonly int _maxSize; + private readonly bool _negate; private readonly bool _hasNewLine; public bool CanSeek { get; } @@ -17,7 +18,7 @@ internal sealed class ListOfChars : Parser, ISeekable public bool SkipWhitespace { get; } - public ListOfChars(string values, int minSize = 1, int maxSize = 0) + public ListOfChars(string values, int minSize = 1, int maxSize = 0, bool negate = false) { foreach (var c in values) { @@ -37,6 +38,7 @@ public ListOfChars(string values, int minSize = 1, int maxSize = 0) _minSize = minSize; _maxSize = maxSize; + _negate = negate; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -52,7 +54,7 @@ public override bool Parse(ParseContext context, ref ParseResult resul for (var i = 0; i < maxLength; i++) { - if (_map[span[i]] == null) + if (_map[span[i]] == null != _negate) { break; } diff --git a/src/Parlot/Fluent/Parsers.cs b/src/Parlot/Fluent/Parsers.cs index 7b7a5dba..f34b6432 100644 --- a/src/Parlot/Fluent/Parsers.cs +++ b/src/Parlot/Fluent/Parsers.cs @@ -224,25 +224,49 @@ public Parser Identifier(Func? extraStart = null, Func /// The instance to match against each char. - /// The minimum number of matches required. Defaults to 1. + /// The minimum number of chars required. Defaults to 1. /// When the parser reaches the maximum number of matches it returns . Defaults to 0, i.e. no maximum size. public Parser AnyOf(SearchValues searchValues, int minSize = 1, int maxSize = 0) => new SearchValuesCharLiteral(searchValues, minSize, maxSize); /// /// Builds a parser that matches a list of chars. /// - /// The set of char to match. - /// The minimum number of matches required. Defaults to 1. + /// The set of chars to match. + /// The minimum number of chars required. Defaults to 1. /// When the parser reaches the maximum number of matches it returns . Defaults to 0, i.e. no maximum size. public Parser AnyOf(ReadOnlySpan values, int minSize = 1, int maxSize = 0) => new SearchValuesCharLiteral(values, minSize, maxSize); + + /// + /// Builds a parser that matches anything but a list of chars. + /// + /// The instance to ignore against each char. + /// The minimum number of chars required. Defaults to 1. + /// When the parser reaches the maximum number of chars it returns . Defaults to 0, i.e. no maximum size. + public Parser NoneOf(SearchValues searchValues, int minSize = 1, int maxSize = 0) => new SearchValuesCharLiteral(searchValues, minSize, maxSize, negate: true); + + /// + /// Builds a parser that matches anything but a list of chars. + /// + /// The set of chars not to match. + /// The minimum number of chars required. Defaults to 1. + /// When the parser reaches the maximum number of chars it returns . Defaults to 0, i.e. no maximum size. + public Parser NoneOf(ReadOnlySpan values, int minSize = 1, int maxSize = 0) => new SearchValuesCharLiteral(values, minSize, maxSize, negate: true); #else /// /// Builds a parser that matches a list of chars. /// - /// The set of char to match. + /// The set of chars to match. /// The minimum number of matches required. Defaults to 1. /// When the parser reaches the maximum number of matches it returns . Defaults to 0, i.e. no maximum size. public Parser AnyOf(string values, int minSize = 1, int maxSize = 0) => new ListOfChars(values, minSize, maxSize); + + /// + /// Builds a parser that matches anything but a list of chars. + /// + /// The set of chars not to match. + /// The minimum number of required chars. Defaults to 1. + /// When the parser reaches the maximum number of chars it returns . Defaults to 0, i.e. no maximum size. + public Parser NoneOf(string values, int minSize = 1, int maxSize = 0) => new ListOfChars(values, minSize, maxSize, negate: true); #endif } diff --git a/src/Parlot/Fluent/SearchValuesCharLiteral.cs b/src/Parlot/Fluent/SearchValuesCharLiteral.cs index cbd00159..7f88b262 100644 --- a/src/Parlot/Fluent/SearchValuesCharLiteral.cs +++ b/src/Parlot/Fluent/SearchValuesCharLiteral.cs @@ -10,6 +10,7 @@ internal sealed class SearchValuesCharLiteral : Parser, ISeekable private readonly SearchValues _searchValues; private readonly int _minSize; private readonly int _maxSize; + private readonly bool _negate; public bool CanSeek { get; } @@ -17,18 +18,20 @@ internal sealed class SearchValuesCharLiteral : Parser, ISeekable public bool SkipWhitespace { get; } - public SearchValuesCharLiteral(SearchValues searchValues, int minSize = 1, int maxSize = 0) + public SearchValuesCharLiteral(SearchValues searchValues, int minSize = 1, int maxSize = 0, bool negate = false) { _searchValues = searchValues ?? throw new ArgumentNullException(nameof(searchValues)); _minSize = minSize; _maxSize = maxSize; + _negate = negate; } - public SearchValuesCharLiteral(ReadOnlySpan searchValues, int minSize = 1, int maxSize = 0) + public SearchValuesCharLiteral(ReadOnlySpan searchValues, int minSize = 1, int maxSize = 0, bool negate = false) { _searchValues = SearchValues.Create(searchValues); _minSize = minSize; _maxSize = maxSize; + _negate = negate; if (minSize > 0) { @@ -49,7 +52,7 @@ public override bool Parse(ParseContext context, ref ParseResult resul } // First char not matching the searched values - var index = span.IndexOfAnyExcept(_searchValues); + var index = _negate ? span.IndexOfAny(_searchValues) : span.IndexOfAnyExcept(_searchValues); var size = 0; diff --git a/test/Parlot.Tests/FluentTests.cs b/test/Parlot.Tests/FluentTests.cs index 8a00e3bc..d8666e19 100644 --- a/test/Parlot.Tests/FluentTests.cs +++ b/test/Parlot.Tests/FluentTests.cs @@ -1143,7 +1143,7 @@ public void NumberShouldNotParseOverflow(string source) [InlineData("ba", "ab", "ab")] [InlineData("abc", "aaabbbccc", "aaabbbccc")] [InlineData("a", "aaab", "aaa")] - [InlineData("aa", "aaaab", "aaaa")] + [InlineData("aa", "aaaaab", "aaaaa")] public void AnyOfShouldMatch(string chars, string source, string expected) { Assert.Equal(expected, Literals.AnyOf(chars).Parse(source).ToString()); @@ -1153,7 +1153,7 @@ public void AnyOfShouldMatch(string chars, string source, string expected) [InlineData("a", "b")] [InlineData("a", "bbb")] [InlineData("abc", "dabc")] - public void AnyOfShouldNotMAtch(string chars, string source) + public void AnyOfShouldNotMatch(string chars, string source) { Assert.False(Literals.AnyOf(chars).TryParse(source, out var _)); } @@ -1192,6 +1192,63 @@ public void AnyOfShouldResetPositionWhenFalse() .TryParse("aaaZZ", out _)); } + [Theory] + [InlineData("a", "b", "b")] + [InlineData("a", "bb", "bb")] + [InlineData("a", "bbbb", "bbbb")] + [InlineData("ab", "cd", "cd")] + [InlineData("ba", "cd", "cd")] + [InlineData("abc", "dddeeefff", "dddeeefff")] + [InlineData("a", "bbba", "bbb")] + [InlineData("aa", "bbbbba", "bbbbb")] + public void NoneOfShouldMatch(string chars, string source, string expected) + { + Assert.Equal(expected, Literals.NoneOf(chars).Parse(source).ToString()); + } + + [Theory] + [InlineData("a", "a")] + [InlineData("a", "aaa")] + [InlineData("abc", "beee")] + public void NoneOfShouldNotMatch(string chars, string source) + { + Assert.False(Literals.NoneOf(chars).TryParse(source, out var _)); + } + + [Fact] + public void NoneOfShouldRespectSizeConstraints() + { + Assert.True(Literals.NoneOf("a", minSize: 0).TryParse("bbb", out var r) && r.ToString() == "bbb"); + Assert.True(Literals.NoneOf("a", minSize: 0).TryParse("aaa", out _)); + Assert.False(Literals.NoneOf("a", minSize: 4).TryParse("bbb", out _)); + Assert.False(Literals.NoneOf("a", minSize: 2).TryParse("ba", out _)); + Assert.False(Literals.NoneOf("a", minSize: 3).TryParse("ba", out _)); + Assert.Equal("bb", Literals.NoneOf("a", minSize: 2, maxSize: 2).Parse("bb")); + Assert.Equal("bb", Literals.NoneOf("a", minSize: 2, maxSize: 3).Parse("bb")); + Assert.Equal("b", Literals.NoneOf("a", maxSize: 1).Parse("bb")); + Assert.Equal("bbbb", Literals.NoneOf("a", minSize: 2, maxSize: 4).Parse("bbbbb")); + Assert.False(Literals.NoneOf("a", minSize: 2, maxSize: 2).TryParse("b", out _)); + } + + [Fact] + public void NoneOfShouldNotBeSeekableIfOptional() + { + var parser = Literals.NoneOf("a", minSize: 0) as ISeekable; + Assert.False(parser.CanSeek); + } + + [Fact] + public void NoneOfShouldResetPositionWhenFalse() + { + Assert.False(Literals.NoneOf("Z", minSize: 3) + .And(Literals.NoneOf("a")) + .TryParse("aaZZ", out _)); + + Assert.True(Literals.NoneOf("Z", minSize: 3) + .And(Literals.NoneOf("a")) + .TryParse("aaaZZ", out _)); + } + [Fact] public void ElseErrorShouldNotBeSeekable() {