Table of Contents

How it works

BenchmarkDotNet follows the following steps to run your benchmarks:

  1. BenchmarkRunner generates an isolated project per each runtime settings and builds it in Release mode.
  2. Next, we take each method/job/params combination and try to measure its performance by launching benchmark process several times (LaunchCount).
  3. An invocation of the workload method is an operation. A bunch of operation is an iteration. If you have an IterationSetup method, it will be invoked before each iteration, but not between operations. We have the following type of iterations:
    • Pilot: The best operation count will be chosen.
    • OverheadWarmup, OverheadWorkload: BenchmarkDotNet overhead will be evaluated.
    • ActualWarmup: Warmup of the workload method.
    • ActualWorkload: Actual measurements.
    • Result = ActualWorkload - <MedianOverhead>
  4. After all of the measurements, BenchmarkDotNet creates:
    • An instance of the Summary class that contains all information about benchmark runs.
    • A set of files that contains summary in human-readable and machine-readable formats.
    • A set of plots.

Pseudocode

If you don't understand our "count terminology", then you might find following pseudocode useful:

IEnumerable<Results> Run(Benchmark benchmark)
{
    var toolchain = benchmark.GetToolchain();

    var autoGeneratedProject = toolchain.Generate(benchmark);
    var exe = toolchain.Build(autoGeneratedProject);

    foreach (var runIndex in LaunchCount) // LaunchCount = 1 by default
        yield return ParseResults(Process.Start(exe).Output); // calls ActualRun in a separate process
}

Result ActualRun(Method method, Job job)
{
    GlobalSetup();

    int unrollFactor = job.Run.UnrollFactor; // 16 by default

    long perfectInvocationCount = Pilot(method, unrollFactor);

    WarmupStage(EMPTY_METHOD, perfectInvocationCount, unrollFactor); // EMPTY_METHOD has same return type and arguments as benchmark
    var overhead = ActualStage(EMPTY_METHOD, perfectInvocationCount, unrollFactor);

    WarmupStage(method, perfectInvocationCount, unrollFactor);
    var result = ActualStage(method, perfectInvocationCount);

    if (MemoryDiagnoser.IsEnabled)
        var gcStats = MeasureGcStats(method, perfectInvocationCount, unrollFactor);

    GlobalCleanup(); 

    return (result - Median(overhead), gcStats);
}

long Pilot(Method method, int unrollFactor)
{
    // invokeCount is the equivalent of InnerIterationCount from xunit-performance
    long invokeCount = minInvokeCount;

    while (true)
    {
        var measurement = RunIteration(method, invokeCount, unrollFactor);

        if (heuristic.IsPilotRequirementMet(measurement))
            break;

        invokeCount *= 2;
    }

    return invokeCount;
}

void Warmup(Method method, long invokeCount, int unrollFactor)
{
    while (true)
    {
        var measurement = RunIteration(method, invokeCount, unrollFactor);

        if (heuristic.IsWarmupRequirementMet(measurement))
            break;
    }
}

IEnuberable<Measurement> Workload(Method method, long invokeCount, int unrollFactor)
{
    while (true)
    {
        var measurement = RunIteration(method, invokeCount, unrollFactor);

        if (measurement.IsNotOutlier)
            yield return measurement;

        if (heuristic.IsWorkloadRequirementMet(measurement))
            yield break;
    }
}

// every iteration invokes the method (invokeCount / unrollFactor) times
Measurement RunIteration(Method method, long invokeCount, long unrollFactor)
{
    IterationSetup();
    MemoryCleanup();

    var clock = Clock.Start();

    for (long i = 0; i < invokeCount / unrollFactor; i++)
    {
        // we perform manual loop unrolling!!
        method(); // 1st call
        method(); // 2nd call

        method(); // (unrollFactor - 1)'th call
        method(); // unrollFactor'th call
    }

    var clockSpan = clock.GetElapsed();

    IterationCleanup();
    MemoryCleanup();

    return Measurement(clockSpan);
}

GcStats MeasureGcStats(Method method, long invokeCount, long unrollFacto)
{
    // we enable monitoring after workload actual run, for this single iteration which is executed at the end
    // so even if we enable AppDomain monitoring in separate process
    // it does not matter, because we have already obtained the results!
    EnableMonitoring(); 

    IterationSetup();

    var initialGcStats = GcStats.ReadInitial();

    // we do NOT start any clock here, because the enabled monitoring might have some overhead
    // so we just get the gc stats and ignore the timing
    // it's last thing the process does before it dies, so also enabled monitoring is not an issue for next benchmarks
    // because each of them is going to be executed in a new process

    for (long i = 0; i < invokeCount / unrollFactor; i++)
    {
        // we perform manual loop unrolling!!
        method(); // 1st call
        method(); // 2nd call

        method(); // (unrollFactor - 1)'th call
        method(); // unrollFactor'th call
    }

    var finalGcStats = GcStats.ReadFinal();

    IterationCleanup();

    return finalGcStats - initialGcStats; // the result is the difference between the stats collected after and before running the extra iteration
}