Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[.NET] Improved parsing time #336

Merged
merged 9 commits into from
Dec 18, 2024
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ This document is formatted according to the principles of [Keep A CHANGELOG](htt
## [Unreleased]
### Fixed
- [c] slight update to existing CMakeFiles.txt to propagate VERSION. Close #320 ([#328](https://github.com/cucumber/gherkin/pull/328))
- [.NET] Improved parsing time
- [.NET] Use string-ordinal comparison consistently and remove old Mono workaround

### Changed
- [cpp] add generic support for ABI versioning with VERSION ([#328](https://github.com/cucumber/gherkin/pull/328))
Expand Down
20 changes: 20 additions & 0 deletions dotnet/Gherkin.Benchmarks/Gherkin.Benchmarks.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFrameworks>net8.0;net481</TargetFrameworks>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
<PackageReference Include="System.Collections.Immutable" Version="8.0.0" />
<PackageReference Include="System.Reflection.Metadata" Version="8.0.1" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Gherkin\Gherkin.csproj" />
</ItemGroup>

</Project>
43 changes: 43 additions & 0 deletions dotnet/Gherkin.Benchmarks/GherkingParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
using BenchmarkDotNet.Attributes;
using Gherkin.Ast;
using System.Text;

namespace Gherkin.Benchmarks;

public class GherkingParser
{
[Params("very_long.feature", "tags.feature")]
public string? FeatureFile { get; set; }

readonly MemoryStream _TestData = new();
readonly Parser _ParserReused = new();
readonly TokenMatcher _TokenMatcher = new();
StreamReader? _Reader;

[GlobalSetup]
public void GlobalSetup()
{
var fullPathToTestFeatureFile = Path.Combine(TestFileProvider.GetTestFileFolder("good"), FeatureFile!);

using var fileStream = new FileStream(fullPathToTestFeatureFile, FileMode.Open, FileAccess.Read);

fileStream.CopyTo(_TestData);

_Reader = new StreamReader(_TestData, Encoding.UTF8, false, 4096, true);
}

[Benchmark]
public GherkinDocument Parser()
{
_TestData.Seek(0, SeekOrigin.Begin);
var parser = new Parser();
return parser.Parse(new TokenScanner(_Reader));
}

[Benchmark]
public GherkinDocument ParserReuse()
{
_TestData.Seek(0, SeekOrigin.Begin);
return _ParserReused.Parse(new TokenScanner(_Reader), _TokenMatcher);
}
}
24 changes: 24 additions & 0 deletions dotnet/Gherkin.Benchmarks/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Diagnosers;
using BenchmarkDotNet.Environments;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Running;

namespace Gherkin.Benchmarks;

internal class Program
{
static void Main(string[] args)
{
#if DEBUG
var config = new DebugInProcessConfig()
#else
var config = DefaultConfig.Instance
.AddJob(Job.Default.WithRuntime(CoreRuntime.Core80))
.AddJob(Job.Default.WithRuntime(ClrRuntime.Net481))
#endif
.AddDiagnoser(MemoryDiagnoser.Default)
;
_ = BenchmarkRunner.Run<GherkingParser>(config);
}
}
15 changes: 15 additions & 0 deletions dotnet/Gherkin.Benchmarks/TestFileProvider.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
namespace Gherkin.Benchmarks;

public class TestFileProvider
{
public static string GetTestFileFolder(string category)
{
var inputFolder = Environment.CurrentDirectory;
#if DEBUG
// Artefacts are not created in subdirectories, so we don't need to go any higher.
#elif NET6_0_OR_GREATER
inputFolder = Path.Combine(inputFolder, "..", "..", "..", "..");
#endif
return Path.GetFullPath(Path.Combine(inputFolder, "..", "..", "..", "..", "..", "testdata", category));
}
}
6 changes: 6 additions & 0 deletions dotnet/Gherkin.sln
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
Makefile = Makefile
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Gherkin.Benchmarks", "Gherkin.Benchmarks\Gherkin.Benchmarks.csproj", "{4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand All @@ -32,6 +34,10 @@ Global
{A0DEA4BA-3A79-4C05-87F2-7C7C9DE8B245}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A0DEA4BA-3A79-4C05-87F2-7C7C9DE8B245}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A0DEA4BA-3A79-4C05-87F2-7C7C9DE8B245}.Release|Any CPU.Build.0 = Release|Any CPU
{4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}.Debug|Any CPU.Build.0 = Debug|Any CPU
{4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}.Release|Any CPU.ActiveCfg = Release|Any CPU
{4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
13 changes: 9 additions & 4 deletions dotnet/Gherkin/AstBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,9 @@ protected virtual void CheckCellCountConsistency(TableRow[] rows)
return;

int cellCount = rows[0].Cells.Count();
foreach (var row in rows)
for (int i = 1; i < rows.Length; i++)
{
var row = rows[i];
if (row.Cells.Count() != cellCount)
{
HandleAstError("inconsistent cell count within the table", row.Location);
Expand All @@ -295,9 +296,13 @@ protected virtual void HandleAstError(string message, Location location)

private TableCell[] GetCells(Token tableRowToken)
{
return tableRowToken.MatchedItems
.Select(cellItem => CreateTableCell(GetLocation(tableRowToken, cellItem.Column), cellItem.Text))
.ToArray();
var cells = new TableCell[tableRowToken.MatchedItems.Length];
for (int i = 0; i < cells.Length; i++)
{
var cellItem = tableRowToken.MatchedItems[i];
cells[i] = CreateTableCell(GetLocation(tableRowToken, cellItem.Column), cellItem.Text);
}
return cells;
}

private static Step[] GetSteps(AstNode scenarioDefinitionNode)
Expand Down
61 changes: 50 additions & 11 deletions dotnet/Gherkin/AstNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ namespace Gherkin;

public class AstNode(RuleType ruleType)
{
private readonly Dictionary<RuleType, IList<object>> subItems = new Dictionary<RuleType, IList<object>>();
private readonly Dictionary<RuleType, object> subItems = new Dictionary<RuleType, object>();

public RuleType RuleType { get; } = ruleType;

Expand All @@ -18,17 +18,50 @@ public IEnumerable<Token> GetTokens(TokenType tokenType)

public T GetSingle<T>(RuleType ruleType)
{
return GetItems<T>(ruleType).SingleOrDefault();
if (!subItems.TryGetValue(ruleType, out var items))
return default;
if (items is List<object> list)
{
T ret = default;
bool foundOne = false;
foreach (var item in list)
{
if (item is T tItem)
{
if (foundOne)
throw new InvalidOperationException();
ret = tItem;
foundOne = true;
}
}
if (foundOne)
return ret;
else
throw new InvalidOperationException();
}
else if (items is T tItem)
{
return tItem;
}
return default;
}

public IEnumerable<T> GetItems<T>(RuleType ruleType)
{
IList<object> items;
if (!subItems.TryGetValue(ruleType, out items))
if (!subItems.TryGetValue(ruleType, out var items))
yield break;
if (items is List<object> list)
{
foreach (var item in list)
{
if (item is T tItem)
yield return tItem;
}
}
else if (items is T tItem)
{
return Enumerable.Empty<T>();
yield return tItem;
}
return items.Cast<T>();
}

public void SetSingle<T>(RuleType ruleType, T value)
Expand All @@ -46,12 +79,18 @@ public void AddRange<T>(RuleType ruleType, IEnumerable<T> values)

public void Add<T>(RuleType ruleType, T obj)
{
IList<object> items;
if (!subItems.TryGetValue(ruleType, out items))
if (!subItems.TryGetValue(ruleType, out var items))
{
subItems.Add(ruleType, obj);
}
else if (items is List<object> list)
{
list.Add(obj);
}
else
{
items = new List<object>();
subItems.Add(ruleType, items);
list = [items, obj];
subItems[ruleType] = list;
}
items.Add(obj);
}
}
9 changes: 8 additions & 1 deletion dotnet/Gherkin/GherkinDialectProvider.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using Gherkin.Ast;
using System.Text.Json;
using System.Text.Json.Serialization;

namespace Gherkin;

Expand Down Expand Up @@ -52,7 +53,7 @@ protected virtual Dictionary<string, GherkinLanguageSetting> LoadLanguageSetting

protected virtual Dictionary<string, GherkinLanguageSetting> ParseJsonContent(string languagesFileContent)
{
return JsonSerializer.Deserialize<Dictionary<string, GherkinLanguageSetting>>(languagesFileContent, new JsonSerializerOptions(JsonSerializerDefaults.Web));
return JsonSerializer.Deserialize<Dictionary<string, GherkinLanguageSetting>>(languagesFileContent, new JsonSerializerOptions(JsonSerializerDefaults.Web) { TypeInfoResolver = SourceGenerationContext.Default });
}

protected virtual bool TryGetDialect(string language, Dictionary<string, GherkinLanguageSetting> gherkinLanguageSettings, Location location, out GherkinDialect dialect)
Expand Down Expand Up @@ -113,6 +114,12 @@ protected static GherkinDialect GetFactoryDefault()
}
}

[JsonSourceGenerationOptions]
[JsonSerializable(typeof(Dictionary<string, GherkinLanguageSetting>))]
internal partial class SourceGenerationContext : JsonSerializerContext
{
}

public class GherkinLanguageSetting
{
public string Name { get; set; }
Expand Down
1 change: 1 addition & 0 deletions dotnet/Gherkin/GherkinLanguageConstants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ public static class GherkinLanguageConstants
public const string COMMENT_PREFIX = "#";
public const string TITLE_KEYWORD_SEPARATOR = ":";
public const string TABLE_CELL_SEPARATOR = "|";
public const char TABLE_CELL_SEPARATOR_CHAR = '|';
public const char TABLE_CELL_ESCAPE_CHAR = '\\';
public const char TABLE_CELL_NEWLINE_ESCAPE = 'n';
public const string DOCSTRING_SEPARATOR = "\"\"\"";
Expand Down
Loading
Loading