From b0e598a81dd79e58eab47dd984d44925ac981b92 Mon Sep 17 00:00:00 2001 From: obligaron Date: Mon, 2 Dec 2024 22:03:11 +0100 Subject: [PATCH 1/9] [.NET] Add benchmarks --- .../Gherkin.Benchmarks.csproj | 20 +++++++++ dotnet/Gherkin.Benchmarks/GherkingParser.cs | 43 +++++++++++++++++++ dotnet/Gherkin.Benchmarks/Program.cs | 24 +++++++++++ dotnet/Gherkin.Benchmarks/TestFileProvider.cs | 15 +++++++ dotnet/Gherkin.sln | 6 +++ 5 files changed, 108 insertions(+) create mode 100644 dotnet/Gherkin.Benchmarks/Gherkin.Benchmarks.csproj create mode 100644 dotnet/Gherkin.Benchmarks/GherkingParser.cs create mode 100644 dotnet/Gherkin.Benchmarks/Program.cs create mode 100644 dotnet/Gherkin.Benchmarks/TestFileProvider.cs diff --git a/dotnet/Gherkin.Benchmarks/Gherkin.Benchmarks.csproj b/dotnet/Gherkin.Benchmarks/Gherkin.Benchmarks.csproj new file mode 100644 index 000000000..5fcb4ce5e --- /dev/null +++ b/dotnet/Gherkin.Benchmarks/Gherkin.Benchmarks.csproj @@ -0,0 +1,20 @@ + + + + Exe + net8.0;net481 + enable + enable + + + + + + + + + + + + + diff --git a/dotnet/Gherkin.Benchmarks/GherkingParser.cs b/dotnet/Gherkin.Benchmarks/GherkingParser.cs new file mode 100644 index 000000000..d8210d253 --- /dev/null +++ b/dotnet/Gherkin.Benchmarks/GherkingParser.cs @@ -0,0 +1,43 @@ +using BenchmarkDotNet.Attributes; +using Gherkin.Ast; +using System.Text; + +namespace Gherkin.Benchmarks; + +public class GherkingParser +{ + [Params("very_long.feature", "tags.feature")] + public string? FeatureFile { get; set; } + + readonly MemoryStream _TestData = new(); + readonly Parser _ParserReused = new(); + readonly TokenMatcher _TokenMatcher = new(); + StreamReader? _Reader; + + [GlobalSetup] + public void GlobalSetup() + { + var fullPathToTestFeatureFile = Path.Combine(TestFileProvider.GetTestFileFolder("good"), FeatureFile!); + + using var fileStream = new FileStream(fullPathToTestFeatureFile, FileMode.Open, FileAccess.Read); + + fileStream.CopyTo(_TestData); + + _Reader = new StreamReader(_TestData, Encoding.UTF8, false, 4096, true); + } + + [Benchmark] + public GherkinDocument Parser() + { + _TestData.Seek(0, SeekOrigin.Begin); + var parser = new Parser(); + return parser.Parse(new TokenScanner(_Reader)); + } + + [Benchmark] + public GherkinDocument ParserReuse() + { + _TestData.Seek(0, SeekOrigin.Begin); + return _ParserReused.Parse(new TokenScanner(_Reader), _TokenMatcher); + } +} diff --git a/dotnet/Gherkin.Benchmarks/Program.cs b/dotnet/Gherkin.Benchmarks/Program.cs new file mode 100644 index 000000000..d031e204d --- /dev/null +++ b/dotnet/Gherkin.Benchmarks/Program.cs @@ -0,0 +1,24 @@ +using BenchmarkDotNet.Configs; +using BenchmarkDotNet.Diagnosers; +using BenchmarkDotNet.Environments; +using BenchmarkDotNet.Jobs; +using BenchmarkDotNet.Running; + +namespace Gherkin.Benchmarks; + +internal class Program +{ + static void Main(string[] args) + { +#if DEBUG + var config = new DebugInProcessConfig() +#else + var config = DefaultConfig.Instance + .AddJob(Job.Default.WithRuntime(CoreRuntime.Core80)) + .AddJob(Job.Default.WithRuntime(ClrRuntime.Net481)) +#endif + .AddDiagnoser(MemoryDiagnoser.Default) + ; + _ = BenchmarkRunner.Run(config); + } +} diff --git a/dotnet/Gherkin.Benchmarks/TestFileProvider.cs b/dotnet/Gherkin.Benchmarks/TestFileProvider.cs new file mode 100644 index 000000000..06a7a9586 --- /dev/null +++ b/dotnet/Gherkin.Benchmarks/TestFileProvider.cs @@ -0,0 +1,15 @@ +namespace Gherkin.Benchmarks; + +public class TestFileProvider +{ + public static string GetTestFileFolder(string category) + { + var inputFolder = Environment.CurrentDirectory; +#if DEBUG + // Artefacts are not created in subdirectories, so we don't need to go any higher. +#elif NET6_0_OR_GREATER + inputFolder = Path.Combine(inputFolder, "..", "..", "..", ".."); +#endif + return Path.GetFullPath(Path.Combine(inputFolder, "..", "..", "..", "..", "..", "testdata", category)); + } +} diff --git a/dotnet/Gherkin.sln b/dotnet/Gherkin.sln index 18c9edcaf..e1dfc4dcc 100644 --- a/dotnet/Gherkin.sln +++ b/dotnet/Gherkin.sln @@ -18,6 +18,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Makefile = Makefile EndProjectSection EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Gherkin.Benchmarks", "Gherkin.Benchmarks\Gherkin.Benchmarks.csproj", "{4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -32,6 +34,10 @@ Global {A0DEA4BA-3A79-4C05-87F2-7C7C9DE8B245}.Debug|Any CPU.Build.0 = Debug|Any CPU {A0DEA4BA-3A79-4C05-87F2-7C7C9DE8B245}.Release|Any CPU.ActiveCfg = Release|Any CPU {A0DEA4BA-3A79-4C05-87F2-7C7C9DE8B245}.Release|Any CPU.Build.0 = Release|Any CPU + {4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}.Debug|Any CPU.Build.0 = Debug|Any CPU + {4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}.Release|Any CPU.ActiveCfg = Release|Any CPU + {4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE From b0e449beab8864f241eb6b1713b9946a6a350b2c Mon Sep 17 00:00:00 2001 From: obligaron Date: Wed, 4 Dec 2024 21:09:51 +0100 Subject: [PATCH 2/9] [.NET] Gherkin.Parser: avoid delegate creation --- CHANGELOG.md | 1 + dotnet/Gherkin/Parser.cs | 205 ++++++++++++++++++++++++++++++------ dotnet/gherkin-csharp.razor | 64 +++++++---- 3 files changed, 216 insertions(+), 54 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f549f80c..a1c6d7fcb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ This document is formatted according to the principles of [Keep A CHANGELOG](htt ## [Unreleased] ### Fixed - [c] slight update to existing CMakeFiles.txt to propagate VERSION. Close #320 ([#328](https://github.com/cucumber/gherkin/pull/328)) +- [.NET] Improved parsing time ### Changed - [cpp] add generic support for ABI versioning with VERSION ([#328](https://github.com/cucumber/gherkin/pull/328)) diff --git a/dotnet/Gherkin/Parser.cs b/dotnet/Gherkin/Parser.cs index 127f34f31..133c81e5f 100644 --- a/dotnet/Gherkin/Parser.cs +++ b/dotnet/Gherkin/Parser.cs @@ -140,47 +140,60 @@ private void AddError(ParserContext context, ParserException error) throw new CompositeParserException(context.Errors.ToArray()); } - private void HandleAstError(ParserContext context, Action action) - { - HandleExternalError(context, () => { action(); return true; }); - } - - private T HandleExternalError(ParserContext context, Func action, T defaultValue = default(T)) + private bool TryHandleExternalError(ParserContext context, Exception exception) { if (StopAtFirstError) - { - return action(); - } + return false; - try - { - return action(); - } - catch (CompositeParserException compositeParserException) + if (exception is CompositeParserException compositeParserException) { foreach (var error in compositeParserException.Errors) AddError(context, error); } - catch (ParserException error) + else if (exception is ParserException error) { AddError(context, error); } - return defaultValue; + return true; } void Build(ParserContext context, Token token) { - HandleAstError(context, () => this.astBuilder.Build(token)); + try + { + this.astBuilder.Build(token); + } + catch (Exception ex) + { + if (!TryHandleExternalError(context, ex)) + throw; + } } void StartRule(ParserContext context, RuleType ruleType) { - HandleAstError(context, () => this.astBuilder.StartRule(ruleType)); + try + { + this.astBuilder.StartRule(ruleType); + } + catch (Exception ex) + { + if (!TryHandleExternalError(context, ex)) + throw; + } } void EndRule(ParserContext context, RuleType ruleType) { - HandleAstError(context, () => this.astBuilder.EndRule(ruleType)); + try + { + this.astBuilder.EndRule(ruleType); + } + catch (Exception ex) + { + if (!TryHandleExternalError(context, ex)) + throw; + } } T GetResult(ParserContext context) @@ -196,72 +209,198 @@ Token ReadToken(ParserContext context) bool Match_EOF(ParserContext context, Token token) { - return HandleExternalError(context, () => context.TokenMatcher.Match_EOF(token), false); + try + { + return context.TokenMatcher.Match_EOF(token); + } + catch (Exception exception) + { + if (TryHandleExternalError(context, exception)) + return false; + throw; + } } bool Match_Empty(ParserContext context, Token token) { if (token.IsEOF) return false; - return HandleExternalError(context, () => context.TokenMatcher.Match_Empty(token), false); + try + { + return context.TokenMatcher.Match_Empty(token); + } + catch (Exception exception) + { + if (TryHandleExternalError(context, exception)) + return false; + throw; + } } bool Match_Comment(ParserContext context, Token token) { if (token.IsEOF) return false; - return HandleExternalError(context, () => context.TokenMatcher.Match_Comment(token), false); + try + { + return context.TokenMatcher.Match_Comment(token); + } + catch (Exception exception) + { + if (TryHandleExternalError(context, exception)) + return false; + throw; + } } bool Match_TagLine(ParserContext context, Token token) { if (token.IsEOF) return false; - return HandleExternalError(context, () => context.TokenMatcher.Match_TagLine(token), false); + try + { + return context.TokenMatcher.Match_TagLine(token); + } + catch (Exception exception) + { + if (TryHandleExternalError(context, exception)) + return false; + throw; + } } bool Match_FeatureLine(ParserContext context, Token token) { if (token.IsEOF) return false; - return HandleExternalError(context, () => context.TokenMatcher.Match_FeatureLine(token), false); + try + { + return context.TokenMatcher.Match_FeatureLine(token); + } + catch (Exception exception) + { + if (TryHandleExternalError(context, exception)) + return false; + throw; + } } bool Match_RuleLine(ParserContext context, Token token) { if (token.IsEOF) return false; - return HandleExternalError(context, () => context.TokenMatcher.Match_RuleLine(token), false); + try + { + return context.TokenMatcher.Match_RuleLine(token); + } + catch (Exception exception) + { + if (TryHandleExternalError(context, exception)) + return false; + throw; + } } bool Match_BackgroundLine(ParserContext context, Token token) { if (token.IsEOF) return false; - return HandleExternalError(context, () => context.TokenMatcher.Match_BackgroundLine(token), false); + try + { + return context.TokenMatcher.Match_BackgroundLine(token); + } + catch (Exception exception) + { + if (TryHandleExternalError(context, exception)) + return false; + throw; + } } bool Match_ScenarioLine(ParserContext context, Token token) { if (token.IsEOF) return false; - return HandleExternalError(context, () => context.TokenMatcher.Match_ScenarioLine(token), false); + try + { + return context.TokenMatcher.Match_ScenarioLine(token); + } + catch (Exception exception) + { + if (TryHandleExternalError(context, exception)) + return false; + throw; + } } bool Match_ExamplesLine(ParserContext context, Token token) { if (token.IsEOF) return false; - return HandleExternalError(context, () => context.TokenMatcher.Match_ExamplesLine(token), false); + try + { + return context.TokenMatcher.Match_ExamplesLine(token); + } + catch (Exception exception) + { + if (TryHandleExternalError(context, exception)) + return false; + throw; + } } bool Match_StepLine(ParserContext context, Token token) { if (token.IsEOF) return false; - return HandleExternalError(context, () => context.TokenMatcher.Match_StepLine(token), false); + try + { + return context.TokenMatcher.Match_StepLine(token); + } + catch (Exception exception) + { + if (TryHandleExternalError(context, exception)) + return false; + throw; + } } bool Match_DocStringSeparator(ParserContext context, Token token) { if (token.IsEOF) return false; - return HandleExternalError(context, () => context.TokenMatcher.Match_DocStringSeparator(token), false); + try + { + return context.TokenMatcher.Match_DocStringSeparator(token); + } + catch (Exception exception) + { + if (TryHandleExternalError(context, exception)) + return false; + throw; + } } bool Match_TableRow(ParserContext context, Token token) { if (token.IsEOF) return false; - return HandleExternalError(context, () => context.TokenMatcher.Match_TableRow(token), false); + try + { + return context.TokenMatcher.Match_TableRow(token); + } + catch (Exception exception) + { + if (TryHandleExternalError(context, exception)) + return false; + throw; + } } bool Match_Language(ParserContext context, Token token) { if (token.IsEOF) return false; - return HandleExternalError(context, () => context.TokenMatcher.Match_Language(token), false); + try + { + return context.TokenMatcher.Match_Language(token); + } + catch (Exception exception) + { + if (TryHandleExternalError(context, exception)) + return false; + throw; + } } bool Match_Other(ParserContext context, Token token) { if (token.IsEOF) return false; - return HandleExternalError(context, () => context.TokenMatcher.Match_Other(token), false); + try + { + return context.TokenMatcher.Match_Other(token); + } + catch (Exception exception) + { + if (TryHandleExternalError(context, exception)) + return false; + throw; + } } protected virtual int MatchToken(int state, Token token, ParserContext context) { diff --git a/dotnet/gherkin-csharp.razor b/dotnet/gherkin-csharp.razor index 28da67629..b08e0120c 100644 --- a/dotnet/gherkin-csharp.razor +++ b/dotnet/gherkin-csharp.razor @@ -1,4 +1,4 @@ -// ------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------ // // This code was generated by Berp (http://https://github.com/gasparnagy/berp/). // @@ -131,47 +131,60 @@ namespace @Model.Namespace throw new CompositeParserException(context.Errors.ToArray()); } - private void HandleAstError(ParserContext context, Action action) - { - HandleExternalError(context, () => { action(); return true; }); - } - - private T HandleExternalError(ParserContext context, Func action, T defaultValue = default(T)) + private bool TryHandleExternalError(ParserContext context, Exception exception) { if (StopAtFirstError) - { - return action(); - } + return false; - try - { - return action(); - } - catch (CompositeParserException compositeParserException) + if (exception is CompositeParserException compositeParserException) { foreach (var error in compositeParserException.Errors) AddError(context, error); } - catch (ParserException error) + else if (exception is ParserException error) { AddError(context, error); } - return defaultValue; + return true; } void Build(ParserContext context, Token token) { - HandleAstError(context, () => this.astBuilder.Build(token)); + try + { + this.astBuilder.Build(token); + } + catch (Exception ex) + { + if (!TryHandleExternalError(context, ex)) + throw; + } } void StartRule(ParserContext context, RuleType ruleType) { - HandleAstError(context, () => this.astBuilder.StartRule(ruleType)); + try + { + this.astBuilder.StartRule(ruleType); + } + catch (Exception ex) + { + if (!TryHandleExternalError(context, ex)) + throw; + } } void EndRule(ParserContext context, RuleType ruleType) { - HandleAstError(context, () => this.astBuilder.EndRule(ruleType)); + try + { + this.astBuilder.EndRule(ruleType); + } + catch (Exception ex) + { + if (!TryHandleExternalError(context, ex)) + throw; + } } T GetResult(ParserContext context) @@ -192,7 +205,16 @@ namespace @Model.Namespace { @:if (token.IsEOF) return false; } - return HandleExternalError(context, () => context.TokenMatcher.Match_@(rule.Name.Replace("#", ""))(token), false); + try + { + return context.TokenMatcher.Match_@(rule.Name.Replace("#", ""))(token); + } + catch (Exception exception) + { + if (TryHandleExternalError(context, exception)) + return false; + throw; + } } } From 3b6382c4a2d2c5df951842ba680857229a6e3231 Mon Sep 17 00:00:00 2001 From: obligaron Date: Wed, 4 Dec 2024 21:45:57 +0100 Subject: [PATCH 3/9] [.NET] GherkinLine.SplitCells: Avoid string allocations --- dotnet/Gherkin/GherkinLanguageConstants.cs | 1 + dotnet/Gherkin/GherkinLine.cs | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dotnet/Gherkin/GherkinLanguageConstants.cs b/dotnet/Gherkin/GherkinLanguageConstants.cs index d1336331f..4a4c742e5 100644 --- a/dotnet/Gherkin/GherkinLanguageConstants.cs +++ b/dotnet/Gherkin/GherkinLanguageConstants.cs @@ -6,6 +6,7 @@ public static class GherkinLanguageConstants public const string COMMENT_PREFIX = "#"; public const string TITLE_KEYWORD_SEPARATOR = ":"; public const string TABLE_CELL_SEPARATOR = "|"; + public const char TABLE_CELL_SEPARATOR_CHAR = '|'; public const char TABLE_CELL_ESCAPE_CHAR = '\\'; public const char TABLE_CELL_NEWLINE_ESCAPE = 'n'; public const string DOCSTRING_SEPARATOR = "\"\"\""; diff --git a/dotnet/Gherkin/GherkinLine.cs b/dotnet/Gherkin/GherkinLine.cs index 46f4f3afe..fb05507b2 100644 --- a/dotnet/Gherkin/GherkinLine.cs +++ b/dotnet/Gherkin/GherkinLine.cs @@ -118,7 +118,7 @@ private IEnumerable> SplitCells(string row) { pos++; char c = rowEnum.Current; - if (c.ToString() == GherkinLanguageConstants.TABLE_CELL_SEPARATOR) + if (c == GherkinLanguageConstants.TABLE_CELL_SEPARATOR_CHAR) { yield return Tuple.Create(cell, startPos); cell = ""; @@ -136,7 +136,7 @@ private IEnumerable> SplitCells(string row) } else { - if (c.ToString() != GherkinLanguageConstants.TABLE_CELL_SEPARATOR && c != GherkinLanguageConstants.TABLE_CELL_ESCAPE_CHAR) + if (c != GherkinLanguageConstants.TABLE_CELL_SEPARATOR_CHAR && c != GherkinLanguageConstants.TABLE_CELL_ESCAPE_CHAR) { cell += GherkinLanguageConstants.TABLE_CELL_ESCAPE_CHAR; } From ec6feef9f72c07016dc4b4168621ca53e5707806 Mon Sep 17 00:00:00 2001 From: obligaron Date: Wed, 4 Dec 2024 23:03:25 +0100 Subject: [PATCH 4/9] [.NET] Optimize AstNode.subItems handling --- dotnet/Gherkin/AstNode.cs | 61 ++++++++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/dotnet/Gherkin/AstNode.cs b/dotnet/Gherkin/AstNode.cs index 42395401b..8c0fbc108 100644 --- a/dotnet/Gherkin/AstNode.cs +++ b/dotnet/Gherkin/AstNode.cs @@ -2,7 +2,7 @@ namespace Gherkin; public class AstNode(RuleType ruleType) { - private readonly Dictionary> subItems = new Dictionary>(); + private readonly Dictionary subItems = new Dictionary(); public RuleType RuleType { get; } = ruleType; @@ -18,17 +18,50 @@ public IEnumerable GetTokens(TokenType tokenType) public T GetSingle(RuleType ruleType) { - return GetItems(ruleType).SingleOrDefault(); + if (!subItems.TryGetValue(ruleType, out var items)) + return default; + if (items is List list) + { + T ret = default; + bool foundOne = false; + foreach (var item in list) + { + if (item is T tItem) + { + if (foundOne) + throw new InvalidOperationException(); + ret = tItem; + foundOne = true; + } + } + if (foundOne) + return ret; + else + throw new InvalidOperationException(); + } + else if (items is T tItem) + { + return tItem; + } + return default; } public IEnumerable GetItems(RuleType ruleType) { - IList items; - if (!subItems.TryGetValue(ruleType, out items)) + if (!subItems.TryGetValue(ruleType, out var items)) + yield break; + if (items is List list) + { + foreach (var item in list) + { + if (item is T tItem) + yield return tItem; + } + } + else if (items is T tItem) { - return Enumerable.Empty(); + yield return tItem; } - return items.Cast(); } public void SetSingle(RuleType ruleType, T value) @@ -46,12 +79,18 @@ public void AddRange(RuleType ruleType, IEnumerable values) public void Add(RuleType ruleType, T obj) { - IList items; - if (!subItems.TryGetValue(ruleType, out items)) + if (!subItems.TryGetValue(ruleType, out var items)) + { + subItems.Add(ruleType, obj); + } + else if (items is List list) + { + list.Add(obj); + } + else { - items = new List(); - subItems.Add(ruleType, items); + list = [items, obj]; + subItems[ruleType] = list; } - items.Add(obj); } } From 6e9a59a1eb47559e8e8adca46836c5a5aeea2430 Mon Sep 17 00:00:00 2001 From: obligaron Date: Fri, 6 Dec 2024 22:20:35 +0100 Subject: [PATCH 5/9] [.NET] Remove old Mono workaround and use string.StartsWith with ordinal everywhere --- CHANGELOG.md | 1 + dotnet/Gherkin/GherkinLine.cs | 2 +- dotnet/Gherkin/StringUtils.cs | 5 +---- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a1c6d7fcb..0e9508c45 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ This document is formatted according to the principles of [Keep A CHANGELOG](htt ### Fixed - [c] slight update to existing CMakeFiles.txt to propagate VERSION. Close #320 ([#328](https://github.com/cucumber/gherkin/pull/328)) - [.NET] Improved parsing time +- [.NET] Use string-ordinal comparison consistently and remove old Mono workaround ### Changed - [cpp] add generic support for ABI versioning with VERSION ([#328](https://github.com/cucumber/gherkin/pull/328)) diff --git a/dotnet/Gherkin/GherkinLine.cs b/dotnet/Gherkin/GherkinLine.cs index fb05507b2..45707bfa0 100644 --- a/dotnet/Gherkin/GherkinLine.cs +++ b/dotnet/Gherkin/GherkinLine.cs @@ -36,7 +36,7 @@ public bool IsEmpty() public bool StartsWith(string text) { - return trimmedLineText.StartsWith(text); + return trimmedLineText.StartsWith(text, StringComparison.Ordinal); } public bool StartsWithTitleKeyword(string text) diff --git a/dotnet/Gherkin/StringUtils.cs b/dotnet/Gherkin/StringUtils.cs index 22ae0e6a5..e23714e93 100644 --- a/dotnet/Gherkin/StringUtils.cs +++ b/dotnet/Gherkin/StringUtils.cs @@ -4,12 +4,9 @@ namespace Gherkin; public class StringUtils { - // string.StartsWith(string) is broken on Mono for strings outside - // the Basic Multilingual Plane (BMP). We have to roll our own so - // it works with Emoji characters. public static bool StartsWith(string a, string b) { - return StartsWith(a.ToCharArray(), b.ToCharArray()); + return a.StartsWith(b, StringComparison.Ordinal); } private static bool StartsWith(char[] a, char[] b) From d3b06c5109f749193676d6d5b9b6e8d311433be5 Mon Sep 17 00:00:00 2001 From: obligaron Date: Fri, 6 Dec 2024 22:59:20 +0100 Subject: [PATCH 6/9] [.NET] Gherkinline.GetTableCells: avoid string concatenation --- dotnet/Gherkin/GherkinLine.cs | 61 +++++++++++++++-------------------- 1 file changed, 26 insertions(+), 35 deletions(-) diff --git a/dotnet/Gherkin/GherkinLine.cs b/dotnet/Gherkin/GherkinLine.cs index 45707bfa0..3a3f2d4d4 100644 --- a/dotnet/Gherkin/GherkinLine.cs +++ b/dotnet/Gherkin/GherkinLine.cs @@ -87,45 +87,45 @@ public IEnumerable GetTags() public IEnumerable GetTableCells() { - var items = SplitCells(trimmedLineText).ToList(); - bool isBeforeFirst = true; - foreach (var item in items.Take(items.Count - 1)) // skipping the one after last - { - if (!isBeforeFirst) - { - int trimmedStart; - var cellText = Trim(item.Item1, out trimmedStart); - var cellPosition = item.Item2 + trimmedStart; + var rowEnum = trimmedLineText.GetEnumerator(); + bool isFirstRow = true; - if (cellText.Length == 0) - cellPosition = item.Item2; + string cell = null; + int pos = 0; + int startPos = 0; - yield return new GherkinLineSpan(Indent + cellPosition + 1, cellText); - } + static void EnsureCellText(ref string cell, string trimmedLineText, ref int startPos, int pos) + { + if (cell is not null) + return; - isBeforeFirst = false; - } - } + while (startPos < pos && Array.IndexOf(inlineWhitespaceChars, trimmedLineText[startPos]) != -1) + startPos++; - private IEnumerable> SplitCells(string row) - { - var rowEnum = row.GetEnumerator(); + cell = trimmedLineText.Substring(startPos, pos - startPos - 1); + } - string cell = ""; - int pos = 0; - int startPos = 0; while (rowEnum.MoveNext()) { pos++; char c = rowEnum.Current; if (c == GherkinLanguageConstants.TABLE_CELL_SEPARATOR_CHAR) { - yield return Tuple.Create(cell, startPos); - cell = ""; + if (isFirstRow) + isFirstRow = false; + else + { + EnsureCellText(ref cell, trimmedLineText, ref startPos, pos); + var cellText = cell.TrimEnd(inlineWhitespaceChars); + + yield return new GherkinLineSpan(Indent + startPos + 1, cellText); + } + cell = null; startPos = pos; } else if (c == GherkinLanguageConstants.TABLE_CELL_ESCAPE_CHAR) { + EnsureCellText(ref cell, trimmedLineText, ref startPos, pos); if (rowEnum.MoveNext()) { pos++; @@ -150,18 +150,9 @@ private IEnumerable> SplitCells(string row) } else { - cell += c; + if (cell is not null) + cell += c; } } - yield return Tuple.Create(cell, startPos); - } - - private string Trim(string s, out int trimmedStart) - { - trimmedStart = 0; - while (trimmedStart < s.Length && inlineWhitespaceChars.Contains(s[trimmedStart])) - trimmedStart++; - - return s.Trim(inlineWhitespaceChars); } } From 81d5822c5a480eaaf4b681b53892d9f0e8f80c3d Mon Sep 17 00:00:00 2001 From: obligaron Date: Fri, 6 Dec 2024 23:11:59 +0100 Subject: [PATCH 7/9] [.NET] AstBuilder.GetCells: avoid unnesscary enumerator and array collections --- dotnet/Gherkin/AstBuilder.cs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/dotnet/Gherkin/AstBuilder.cs b/dotnet/Gherkin/AstBuilder.cs index 433af8ad9..81f3f3318 100644 --- a/dotnet/Gherkin/AstBuilder.cs +++ b/dotnet/Gherkin/AstBuilder.cs @@ -279,8 +279,9 @@ protected virtual void CheckCellCountConsistency(TableRow[] rows) return; int cellCount = rows[0].Cells.Count(); - foreach (var row in rows) + for (int i = 1; i < rows.Length; i++) { + var row = rows[i]; if (row.Cells.Count() != cellCount) { HandleAstError("inconsistent cell count within the table", row.Location); @@ -295,9 +296,13 @@ protected virtual void HandleAstError(string message, Location location) private TableCell[] GetCells(Token tableRowToken) { - return tableRowToken.MatchedItems - .Select(cellItem => CreateTableCell(GetLocation(tableRowToken, cellItem.Column), cellItem.Text)) - .ToArray(); + var cells = new TableCell[tableRowToken.MatchedItems.Length]; + for (int i = 0; i < cells.Length; i++) + { + var cellItem = tableRowToken.MatchedItems[i]; + cells[i] = CreateTableCell(GetLocation(tableRowToken, cellItem.Column), cellItem.Text); + } + return cells; } private static Step[] GetSteps(AstNode scenarioDefinitionNode) From 91380afd90de4e8707ce67926d91101b1edd1683 Mon Sep 17 00:00:00 2001 From: obligaron Date: Sat, 7 Dec 2024 00:42:28 +0100 Subject: [PATCH 8/9] [.NET] GherkinDialectProvider.ParseJsonContent: Use SourceCodeGenerator for System.Text.Json --- dotnet/Gherkin/GherkinDialectProvider.cs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dotnet/Gherkin/GherkinDialectProvider.cs b/dotnet/Gherkin/GherkinDialectProvider.cs index ac3e17c57..67eef4125 100644 --- a/dotnet/Gherkin/GherkinDialectProvider.cs +++ b/dotnet/Gherkin/GherkinDialectProvider.cs @@ -1,5 +1,6 @@ using Gherkin.Ast; using System.Text.Json; +using System.Text.Json.Serialization; namespace Gherkin; @@ -52,7 +53,7 @@ protected virtual Dictionary LoadLanguageSetting protected virtual Dictionary ParseJsonContent(string languagesFileContent) { - return JsonSerializer.Deserialize>(languagesFileContent, new JsonSerializerOptions(JsonSerializerDefaults.Web)); + return JsonSerializer.Deserialize>(languagesFileContent, new JsonSerializerOptions(JsonSerializerDefaults.Web) { TypeInfoResolver = SourceGenerationContext.Default }); } protected virtual bool TryGetDialect(string language, Dictionary gherkinLanguageSettings, Location location, out GherkinDialect dialect) @@ -113,6 +114,12 @@ protected static GherkinDialect GetFactoryDefault() } } +[JsonSourceGenerationOptions] +[JsonSerializable(typeof(Dictionary))] +internal partial class SourceGenerationContext : JsonSerializerContext +{ +} + public class GherkinLanguageSetting { public string Name { get; set; } From 4b25f1ccffc0438c307f28ae406f615c6f91a844 Mon Sep 17 00:00:00 2001 From: obligaron Date: Sat, 7 Dec 2024 18:15:20 +0100 Subject: [PATCH 9/9] [.NET] GherkinLine.GetTags: avoid calling RegEx --- dotnet/Gherkin/GherkinLine.cs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/dotnet/Gherkin/GherkinLine.cs b/dotnet/Gherkin/GherkinLine.cs index 3a3f2d4d4..51d7ec00a 100644 --- a/dotnet/Gherkin/GherkinLine.cs +++ b/dotnet/Gherkin/GherkinLine.cs @@ -1,5 +1,4 @@ using Gherkin.Ast; -using System.Text.RegularExpressions; namespace Gherkin; @@ -65,7 +64,19 @@ public string GetRestTrimmed(int length) public IEnumerable GetTags() { - var uncommentedLine = Regex.Split(trimmedLineText, @"\s" + GherkinLanguageConstants.COMMENT_PREFIX)[0]; + string uncommentedLine = trimmedLineText; + var commentIndex = trimmedLineText.IndexOf(GherkinLanguageConstants.COMMENT_PREFIX[0]); + while (commentIndex >= 0) + { + if (commentIndex == 0) + yield break; + if (Array.IndexOf(inlineWhitespaceChars, trimmedLineText[commentIndex - 1]) == 0) + { + uncommentedLine = uncommentedLine.Substring(0, commentIndex); + break; + } + commentIndex = trimmedLineText.IndexOf(GherkinLanguageConstants.COMMENT_PREFIX[0], commentIndex + 1); + } int position = Indent; foreach (string item in uncommentedLine.Split(GherkinLanguageConstants.TAG_PREFIX[0])) {