How it works
BenchmarkDotNet follows the following steps to run your benchmarks:
BenchmarkRunner
generates an isolated project per each runtime settings and builds it in Release mode.- Next, we take each method/job/params combination and try to measure its performance by launching benchmark process several times (
LaunchCount
). - An invocation of the workload method is an operation. A bunch of operation is an iteration. If you have an
IterationSetup
method, it will be invoked before each iteration, but not between operations. We have the following type of iterations:Pilot
: The best operation count will be chosen.OverheadWarmup
,OverheadWorkload
: BenchmarkDotNet overhead will be evaluated.ActualWarmup
: Warmup of the workload method.ActualWorkload
: Actual measurements.Result
=ActualWorkload
-<MedianOverhead>
- After all of the measurements, BenchmarkDotNet creates:
- An instance of the
Summary
class that contains all information about benchmark runs. - A set of files that contains summary in human-readable and machine-readable formats.
- A set of plots.
- An instance of the
Pseudocode
If you don't understand our "count terminology", then you might find following pseudocode useful:
IEnumerable<Results> Run(Benchmark benchmark)
{
var toolchain = benchmark.GetToolchain();
var autoGeneratedProject = toolchain.Generate(benchmark);
var exe = toolchain.Build(autoGeneratedProject);
foreach (var runIndex in LaunchCount) // LaunchCount = 1 by default
yield return ParseResults(Process.Start(exe).Output); // calls ActualRun in a separate process
}
Result ActualRun(Method method, Job job)
{
GlobalSetup();
int unrollFactor = job.Run.UnrollFactor; // 16 by default
long perfectInvocationCount = Pilot(method, unrollFactor);
WarmupStage(EMPTY_METHOD, perfectInvocationCount, unrollFactor); // EMPTY_METHOD has same return type and arguments as benchmark
var overhead = ActualStage(EMPTY_METHOD, perfectInvocationCount, unrollFactor);
WarmupStage(method, perfectInvocationCount, unrollFactor);
var result = ActualStage(method, perfectInvocationCount);
if (MemoryDiagnoser.IsEnabled)
var gcStats = MeasureGcStats(method, perfectInvocationCount, unrollFactor);
GlobalCleanup();
return (result - Median(overhead), gcStats);
}
long Pilot(Method method, int unrollFactor)
{
// invokeCount is the equivalent of InnerIterationCount from xunit-performance
long invokeCount = minInvokeCount;
while (true)
{
var measurement = RunIteration(method, invokeCount, unrollFactor);
if (heuristic.IsPilotRequirementMet(measurement))
break;
invokeCount *= 2;
}
return invokeCount;
}
void Warmup(Method method, long invokeCount, int unrollFactor)
{
while (true)
{
var measurement = RunIteration(method, invokeCount, unrollFactor);
if (heuristic.IsWarmupRequirementMet(measurement))
break;
}
}
IEnuberable<Measurement> Workload(Method method, long invokeCount, int unrollFactor)
{
while (true)
{
var measurement = RunIteration(method, invokeCount, unrollFactor);
if (measurement.IsNotOutlier)
yield return measurement;
if (heuristic.IsWorkloadRequirementMet(measurement))
yield break;
}
}
// every iteration invokes the method (invokeCount / unrollFactor) times
Measurement RunIteration(Method method, long invokeCount, long unrollFactor)
{
IterationSetup();
MemoryCleanup();
var clock = Clock.Start();
for (long i = 0; i < invokeCount / unrollFactor; i++)
{
// we perform manual loop unrolling!!
method(); // 1st call
method(); // 2nd call
method(); // (unrollFactor - 1)'th call
method(); // unrollFactor'th call
}
var clockSpan = clock.GetElapsed();
IterationCleanup();
MemoryCleanup();
return Measurement(clockSpan);
}
GcStats MeasureGcStats(Method method, long invokeCount, long unrollFacto)
{
// we enable monitoring after workload actual run, for this single iteration which is executed at the end
// so even if we enable AppDomain monitoring in separate process
// it does not matter, because we have already obtained the results!
EnableMonitoring();
IterationSetup();
var initialGcStats = GcStats.ReadInitial();
// we do NOT start any clock here, because the enabled monitoring might have some overhead
// so we just get the gc stats and ignore the timing
// it's last thing the process does before it dies, so also enabled monitoring is not an issue for next benchmarks
// because each of them is going to be executed in a new process
for (long i = 0; i < invokeCount / unrollFactor; i++)
{
// we perform manual loop unrolling!!
method(); // 1st call
method(); // 2nd call
method(); // (unrollFactor - 1)'th call
method(); // unrollFactor'th call
}
var finalGcStats = GcStats.ReadFinal();
IterationCleanup();
return finalGcStats - initialGcStats; // the result is the difference between the stats collected after and before running the extra iteration
}