double measureDataPerfGivenStack()

in unittest/lib/perf.cpp [494:615]


double measureDataPerfGivenStack(
                                SIZE_T keySize,
                                SIZE_T dataSize,
                                PerfKeyFn keyFn,
                                PerfDataFn prepFn,
                                PerfDataFn dataFn,
                                PerfCleanFn cleanFn,
                                int * pNRuns )
{
    PBYTE buf1 = g_perfBuffer + 0*PERF_BUFFER_SIZE + (g_rng.sizet( PERF_BUFFER_SIZE ) & ~0x3f); // cache-aligned buffers
    PBYTE buf2 = g_perfBuffer + 2*PERF_BUFFER_SIZE + (g_rng.sizet( PERF_BUFFER_SIZE ) & ~0x3f);
    PBYTE buf3 = g_perfBuffer + 4*PERF_BUFFER_SIZE + (g_rng.sizet( PERF_BUFFER_SIZE ) & ~0x3f);
    //PBYTE buf4 = g_perfBuffer + 6*PERF_BUFFER_SIZE + (g_rng.sizet( PERF_BUFFER_SIZE ) & ~0x3f);

    double   durations[ MEASUREMENTS_PER_RESULT ];
    //double   average[ MEASUREMENTS_PER_RESULT ]; // Helpful when debugging

    if( keyFn != NULL )
    {
        (*keyFn)( buf1, buf2, buf3, keySize );
    }

    int runs = *pNRuns;
    int i=0;
    ULONGLONG time0, time1, time2;

    if( prepFn != NULL )
    {
        (*prepFn) (buf1, buf2, buf3, dataSize );
    }

    (*dataFn)( buf1, buf2, buf3, dataSize ); // Run data function once to prime caches
    ULONGLONG loopStart = GET_PERF_CLOCK();

    time0 = GET_PERF_CLOCK();
    FIXED_TIME_LOOP();
    time1 = GET_PERF_CLOCK();
    double fixedBefore = (double) (time1 - time0);

    // Try to get MEASUREMENTS_PER_RESULT measurements, but limit to g_largeMeasurementClockTime cycles if at least one measurement has been made
    while( i < MEASUREMENTS_PER_RESULT && (i < 1 || (GET_PERF_CLOCK() - loopStart) < g_largeMeasurementClockTime) )
    {
        // Measure fixed time loop before and after loop of function of interest
        // We use this both to ensure that the timing has not changed dramatically during the loop of the function of interest
        // and to scale different measurements at different times so they are more directly comparable
        time0 = GET_PERF_CLOCK();
        for( int j=0; j<runs; j++ )
        {
            (*dataFn)( buf1, buf2, buf3, dataSize );
        }
        time1 = GET_PERF_CLOCK();
        FIXED_TIME_LOOP();
        time2 = GET_PERF_CLOCK();

        double fixedAfter = (double)  (time2 - time1);
        double measurementScaleFactor = ((double) FIXED_TIME_LOOP_EXPECTED_CYCLES * 2) / (fixedBefore + fixedAfter);
        double fixedRatio = fixedBefore > fixedAfter ? fixedBefore / fixedAfter : fixedAfter / fixedBefore;
        fixedBefore = fixedAfter; // now use this after measurement as the next before measurement

        if( g_perfClockScaling && fixedRatio > 1.01f )
        {
            // Something changed in timing between before and after, rerun this run
            continue;
        }

        ULONGLONG duration = time1 - time0;
        if( duration < g_minMeasurementClockTime )
        {
            //
            // The measurement was too short, restart & double the # runs we do.
            //
            i = 0;
            loopStart = GET_PERF_CLOCK();
            runs <<= 1;
            CHECK( runs <= MAX_RUNS_PER_MEASUREMENT, "Measurement too fast" );
            continue;
        }

        durations[i] = (double) duration;
        if( g_perfClockScaling )
        {
            durations[i] *= measurementScaleFactor;
        }

        //average[i+1] = fixedAverage; // Helpful when debugging

        ++i;
    }

    /*
    // Helpful when debugging
    print( " mdpgs[%i]", runs);
    char c = '[';
    for( int j=0; j<i; j++ )
    {
        print( "%c(%f,%f)", c, durations[j], average[j] / g_fixedTimeLoopRuns );
        c = ',';
    }
    print( "]\n" );
    */

    qsort( durations, i, sizeof( durations[0] ), compareDouble );

    //
    // We return the one-third percentile point to compensate for expected slow-downs.
    //
    double res = (double) durations[i/3];
    res -= g_perfMeasurementOverhead;
    res /= runs;
    res *= g_perfScaleFactor;
    res -= g_perfRunOverhead;

    *pNRuns = runs;

    if( cleanFn != NULL )
    {
        (*cleanFn)( buf1, buf2, buf3 );
    }

    CHECK5( !isnan(res), "NaN result for measureDataPerfGivenStack res: durations[%d/3]: %f runs: %d", i, (double) durations[i/3], runs );
    return res;
}