-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Description
Hi, after the migration from netfx to netcore we observed a performance degradation in places with intensive usage of Expression.
After profiling and research we found a reason of this, it was Expression.Lambda<TDelegate> call.
I assume that this PR is the reason of this performance degradation.
To summarize, before this changes we called the generic constructor. After the changes firstly we search in the cache (cache size is 50) if we have cache-miss we will use reflection and will replace one row in the cache.
I think the cache isn't necessary for generic Expression.Lambda<TDelegate> method.
I have prepared and built my own runtime fork with a fix and also did a benchmark for a cache-miss situation.
Benchmark:
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Linq.Expressions;
using System.Reflection;
using System.Text.RegularExpressions;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Diagnostics.Windows.Configs;
using BenchmarkDotNet.Environments;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Order;
using BenchmarkDotNet.Running;
using BenchmarkDotNet.Toolchains.CoreRun;
using BenchmarkDotNet.Toolchains.CsProj;
using BenchmarkDotNet.Toolchains.DotNetCli;
namespace expression_lambda_regression_issue
{
public static class Program
{
[Orderer(SummaryOrderPolicy.Declared, MethodOrderPolicy.Declared)]
[MarkdownExporterAttribute.GitHub]
[GcServer(true)]
[MinColumn, MaxColumn]
[MemoryDiagnoser]
[NativeMemoryProfiler]
public class ExpressionLambdaBenchmark
{
public static readonly List<Expression> bodies = new List<Expression>();
public static readonly List<ParameterExpression[]> parameters = new List<ParameterExpression[]>();
public static readonly List<Func<Expression, IEnumerable<ParameterExpression>, LambdaExpression>> exprLambdaCallers = new List<Func<Expression, IEnumerable<ParameterExpression>, LambdaExpression>>();
[GlobalSetup]
public void Setup()
{
var coreFxAssemblyInfo = FileVersionInfo.GetVersionInfo(typeof(Regex).GetTypeInfo().Assembly.Location);
var coreClrAssemblyInfo = FileVersionInfo.GetVersionInfo(typeof(object).GetTypeInfo().Assembly.Location);
Console.WriteLine($"// CoreFx version: {coreFxAssemblyInfo.FileVersion}, location {typeof(Regex).GetTypeInfo().Assembly.Location}, product version {coreFxAssemblyInfo.ProductVersion}");
Console.WriteLine($"// CoreClr version {coreClrAssemblyInfo.FileVersion}, location {typeof(object).GetTypeInfo().Assembly.Location}, product version {coreClrAssemblyInfo.ProductVersion}");
// we need generate > 100 Expression.Lambda<TDelegate>, so I take 259
var list = typeof(List<>).Assembly.GetTypes()
.Where(t => !t.IsAbstract && !t.IsGenericType)
.Select(t => (Type: t, Property: t.GetProperties(BindingFlags.Public | BindingFlags.Instance).FirstOrDefault(x => x.CanRead && x.PropertyType == typeof(string))))
.Where(t => t.Property != null)
.ToList();
if (list.Count <= 100)
throw new Exception("Not enough for reproducing regression. Introduced cache size is 50");
for (var i = 0; i < list.Count; i++)
{
var parameter = Expression.Parameter(list[i].Type, "foo");
bodies.Add(Expression.Constant(null, typeof(object)));
parameters.Add(new ParameterExpression[] { parameter });
}
var mi = typeof(Expression).GetMethods()
.Single(m => m.IsGenericMethod && m.Name == "Lambda" && m.GetParameters()[1].ParameterType == typeof(IEnumerable<ParameterExpression>));
foreach (var item in list)
{
var exprLambdaReturnType = typeof(Func<,>).MakeGenericType(item.Type, typeof(object));
var exprLambda = mi.MakeGenericMethod(exprLambdaReturnType);
exprLambdaCallers.Add((Func<Expression, IEnumerable<ParameterExpression>, LambdaExpression>)Delegate.CreateDelegate(typeof(Func<Expression, IEnumerable<ParameterExpression>, LambdaExpression>), exprLambda));
}
}
[Benchmark]
public LambdaExpression Expression_Lambda_TDelegate()
{
LambdaExpression lambda = default;
for (int i = 0; i < 1_000; ++i)
{
for (int j = 0; j < exprLambdaCallers.Count; ++j)
{
lambda = exprLambdaCallers[j](bodies[j], parameters[j]);
}
}
return lambda;
}
}
public static void Main(string[] _)
{
const int warmup = 1;
const int invocation = 1;
const int launch = 1;
const int iteration = 1;
var config = DefaultConfig.Instance
.With(Job.Dry
.With(CsProjClassicNetToolchain.Net48)
.WithId("net48")
.WithBaseline(true)
.WithWarmupCount(warmup)
.WithLaunchCount(launch)
.WithIterationCount(iteration)
.WithInvocationCount(invocation)
)
.With(Job.Dry
.With(new CoreRunToolchain(new FileInfo(@"C:\work\dotnet\runtime-upstream\artifacts\bin\testhost\netcoreapp5.0-Windows_NT-Release-x64\shared\Microsoft.NETCore.App\5.0.0\CoreRun.exe"), true, "netcoreapp5.0", displayName: "netcoreapp50"))
.WithId("netcoreapp50")
.WithWarmupCount(warmup)
.WithLaunchCount(launch)
.WithInvocationCount(invocation)
.WithIterationCount(iteration)
)
.With(Job.Dry
.With(new CoreRunToolchain(new FileInfo(@"C:\work\dotnet\runtime-upstream\artifacts\bin\testhost\netcoreapp5.0-Windows_NT-Release-x64\shared\Microsoft.NETCore.App\5.0.0\CoreRun.exe"), true, "netcoreapp5.0", displayName: "netcoreapp50"))
.WithId("netcoreapp50")
.WithWarmupCount(warmup)
.WithLaunchCount(launch)
.WithInvocationCount(invocation)
.WithIterationCount(iteration)
.WithEnvironmentVariable("COMPlus_TieredCompilation", "0")
)
.With(Job.Dry
.With(new CoreRunToolchain(new FileInfo(@"C:\work\dotnet\runtime\artifacts\bin\testhost\netcoreapp5.0-Windows_NT-Release-x64\shared\Microsoft.NETCore.App\5.0.0\CoreRun.exe"), true, "netcoreapp5.0", displayName: "netcoreapp50_vchirikov"))
.WithId("netcoreapp50_vchirikov")
.WithWarmupCount(warmup)
.WithLaunchCount(launch)
.WithInvocationCount(invocation)
.WithIterationCount(iteration)
)
.With(Job.Dry
.With(new CoreRunToolchain(new FileInfo(@"C:\work\dotnet\runtime\artifacts\bin\testhost\netcoreapp5.0-Windows_NT-Release-x64\shared\Microsoft.NETCore.App\5.0.0\CoreRun.exe"), true, "netcoreapp5.0", displayName: "netcoreapp50_vchirikov"))
.WithId("netcoreapp50_vchirikov")
.WithWarmupCount(warmup)
.WithLaunchCount(launch)
.WithInvocationCount(invocation)
.WithIterationCount(iteration)
.WithEnvironmentVariable("COMPlus_TieredCompilation", "0")
)
;
BenchmarkRunner.Run<ExpressionLambdaBenchmark>(config);
}
}
}BenchmarkDotNet=v0.12.0, OS=Windows 10.0.18363
AMD Ryzen 9 3900X, 1 CPU, 24 logical and 12 physical cores
.NET Core SDK=5.0.100-preview.2.20120.11
[Host] : .NET Core 5.0.0 (CoreCLR 5.0.20.11909, CoreFX 5.0.20.11909), X64 RyuJIT
Job-OOGNGR : .NET Core 5.0.0 (CoreCLR 42.42.42.42424, CoreFX 42.42.42.42424), X64 RyuJIT
Job-ZDXXHT : .NET Core 5.0.0 (CoreCLR 42.42.42.42424, CoreFX 42.42.42.42424), X64 RyuJIT
Job-AZQMMA : .NET Framework 4.8 (4.8.4121.0), X64 RyuJIT
Job-MHZNLL : .NET Core 5.0.0 (CoreCLR 42.42.42.42424, CoreFX 42.42.42.42424), X64 RyuJIT
Job-THTYGA : .NET Core 5.0.0 (CoreCLR 42.42.42.42424, CoreFX 42.42.42.42424), X64 RyuJIT
Server=True InvocationCount=1 IterationCount=1
LaunchCount=1 RunStrategy=ColdStart UnrollFactor=1
WarmupCount=1
| Method | EnvironmentVariables | Toolchain | Mean | Error | Min | Max | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | Allocated native memory | Native memory leak |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Expression_Lambda_TDelegate | COMPlus_TieredCompilation=0 | netcoreapp50 | 905.8 ms | NA | 905.8 ms | 905.8 ms | 1.97 | 20000.0000 | 1000.0000 | - | 164.21 MB | 1.13 MB | 0 MB |
| Expression_Lambda_TDelegate | COMPlus_TieredCompilation=0 | netcoreapp50_vchirikov | 172.6 ms | NA | 172.6 ms | 172.6 ms | 0.38 | 11000.0000 | - | - | 94.58 MB | 0.62 MB | 0 MB |
| Expression_Lambda_TDelegate | Empty | net48 | 459.2 ms | NA | 459.2 ms | 459.2 ms | 1.00 | 143000.0000 | - | - | 226.48 MB | 0.43 MB | 0.01 MB |
| Expression_Lambda_TDelegate | Empty | netcoreapp50 | 883.4 ms | NA | 883.4 ms | 883.4 ms | 1.92 | 20000.0000 | 1000.0000 | - | 164.14 MB | 1.55 MB | 0.22 MB |
| Expression_Lambda_TDelegate | Empty | netcoreapp50_vchirikov | 202.2 ms | NA | 202.2 ms | 202.2 ms | 0.44 | 11000.0000 | - | - | 94.73 MB | 1.02 MB | 0.22 MB |
Fix here: #32768
--
p.s. I fixed the benchmark, now it shows the cache-miss situation better