in unittest/lib/perf.cpp [494:615]
double measureDataPerfGivenStack(
SIZE_T keySize,
SIZE_T dataSize,
PerfKeyFn keyFn,
PerfDataFn prepFn,
PerfDataFn dataFn,
PerfCleanFn cleanFn,
int * pNRuns )
{
PBYTE buf1 = g_perfBuffer + 0*PERF_BUFFER_SIZE + (g_rng.sizet( PERF_BUFFER_SIZE ) & ~0x3f); // cache-aligned buffers
PBYTE buf2 = g_perfBuffer + 2*PERF_BUFFER_SIZE + (g_rng.sizet( PERF_BUFFER_SIZE ) & ~0x3f);
PBYTE buf3 = g_perfBuffer + 4*PERF_BUFFER_SIZE + (g_rng.sizet( PERF_BUFFER_SIZE ) & ~0x3f);
//PBYTE buf4 = g_perfBuffer + 6*PERF_BUFFER_SIZE + (g_rng.sizet( PERF_BUFFER_SIZE ) & ~0x3f);
double durations[ MEASUREMENTS_PER_RESULT ];
//double average[ MEASUREMENTS_PER_RESULT ]; // Helpful when debugging
if( keyFn != NULL )
{
(*keyFn)( buf1, buf2, buf3, keySize );
}
int runs = *pNRuns;
int i=0;
ULONGLONG time0, time1, time2;
if( prepFn != NULL )
{
(*prepFn) (buf1, buf2, buf3, dataSize );
}
(*dataFn)( buf1, buf2, buf3, dataSize ); // Run data function once to prime caches
ULONGLONG loopStart = GET_PERF_CLOCK();
time0 = GET_PERF_CLOCK();
FIXED_TIME_LOOP();
time1 = GET_PERF_CLOCK();
double fixedBefore = (double) (time1 - time0);
// Try to get MEASUREMENTS_PER_RESULT measurements, but limit to g_largeMeasurementClockTime cycles if at least one measurement has been made
while( i < MEASUREMENTS_PER_RESULT && (i < 1 || (GET_PERF_CLOCK() - loopStart) < g_largeMeasurementClockTime) )
{
// Measure fixed time loop before and after loop of function of interest
// We use this both to ensure that the timing has not changed dramatically during the loop of the function of interest
// and to scale different measurements at different times so they are more directly comparable
time0 = GET_PERF_CLOCK();
for( int j=0; j<runs; j++ )
{
(*dataFn)( buf1, buf2, buf3, dataSize );
}
time1 = GET_PERF_CLOCK();
FIXED_TIME_LOOP();
time2 = GET_PERF_CLOCK();
double fixedAfter = (double) (time2 - time1);
double measurementScaleFactor = ((double) FIXED_TIME_LOOP_EXPECTED_CYCLES * 2) / (fixedBefore + fixedAfter);
double fixedRatio = fixedBefore > fixedAfter ? fixedBefore / fixedAfter : fixedAfter / fixedBefore;
fixedBefore = fixedAfter; // now use this after measurement as the next before measurement
if( g_perfClockScaling && fixedRatio > 1.01f )
{
// Something changed in timing between before and after, rerun this run
continue;
}
ULONGLONG duration = time1 - time0;
if( duration < g_minMeasurementClockTime )
{
//
// The measurement was too short, restart & double the # runs we do.
//
i = 0;
loopStart = GET_PERF_CLOCK();
runs <<= 1;
CHECK( runs <= MAX_RUNS_PER_MEASUREMENT, "Measurement too fast" );
continue;
}
durations[i] = (double) duration;
if( g_perfClockScaling )
{
durations[i] *= measurementScaleFactor;
}
//average[i+1] = fixedAverage; // Helpful when debugging
++i;
}
/*
// Helpful when debugging
print( " mdpgs[%i]", runs);
char c = '[';
for( int j=0; j<i; j++ )
{
print( "%c(%f,%f)", c, durations[j], average[j] / g_fixedTimeLoopRuns );
c = ',';
}
print( "]\n" );
*/
qsort( durations, i, sizeof( durations[0] ), compareDouble );
//
// We return the one-third percentile point to compensate for expected slow-downs.
//
double res = (double) durations[i/3];
res -= g_perfMeasurementOverhead;
res /= runs;
res *= g_perfScaleFactor;
res -= g_perfRunOverhead;
*pNRuns = runs;
if( cleanFn != NULL )
{
(*cleanFn)( buf1, buf2, buf3 );
}
CHECK5( !isnan(res), "NaN result for measureDataPerfGivenStack res: durations[%d/3]: %f runs: %d", i, (double) durations[i/3], runs );
return res;
}