in Benchmarks/NetPipe/netmpi.c [112:531]
int main(int argc, char *argv[])
{
FILE *out=0; /* Output data file */
char s[255]; /* Generic string */
char *memtmp;
char *memtmp1;
int i, j, n, nq, /* Loop indices */
bufoffset = 0, /* Align buffer to this */
bufalign = 16*1024, /* Boundary to align buffer to */
nrepeat, /* Number of time to do the transmission*/
nzero = 0,
len, /* Number of bytes to be transmitted */
inc = 1, /* Increment value */
detailflag = 0, /* Set to examine the signature curve detail*/
pert, /* Perturbation value */
ipert, /* index of the perturbation loop */
start = 0, /* Starting value for signature curve */
end = MAXINT, /* Ending value for signature curve */
streamopt = 0, /* Streaming mode flag */
printopt = 1; /* Debug print statements flag */
int one_buffer = 0;
int onebuffersize = 100*1024*1024;
int quit = 0;
ArgStruct args; /* Argumentsfor all the calls */
double t, t0, t1, t2, /* Time variables */
tlast, /* Time for the last transmission */
tzero = 0,
latency, /* Network message latency */
synctime; /* Network synchronization time */
Data *bwdata; /* Bandwidth curve data */
BOOL bNoCache = FALSE;
BOOL bHeadToHead = FALSE;
BOOL bSavePert = FALSE;
BOOL bUseMegaBytes = FALSE;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &g_nNproc);
MPI_Comm_rank(MPI_COMM_WORLD, &g_nIproc);
if (g_nNproc != 2)
{
if (g_nIproc == 0)
PrintOptions();
MPI_Finalize();
exit(0);
}
GetOptDouble(&argc, &argv, "-time", &g_STOPTM);
GetOptInt(&argc, &argv, "-reps", &g_NSAMP);
GetOptInt(&argc, &argv, "-start", &start);
GetOptInt(&argc, &argv, "-end", &end);
GetOptInt(&argc, &argv, "-align", &bufalign);
one_buffer = GetOptInt(&argc, &argv, "-onebuffer", &onebuffersize);
if (one_buffer)
{
if (onebuffersize < 1)
{
one_buffer = 0;
}
else
{
onebuffersize += bufalign;
}
}
bNoCache = GetOpt(&argc, &argv, "-nocache");
bHeadToHead = GetOpt(&argc, &argv, "-headtohead");
bUseMegaBytes = GetOpt(&argc, &argv, "-mb");
if (GetOpt(&argc, &argv, "-noprint"))
printopt = 0;
bSavePert = GetOpt(&argc, &argv, "-pert");
bwdata = malloc((g_NSAMP+1) * sizeof(Data));
if (g_nIproc == 0)
strcpy(s, "Netpipe.out");
GetOptString(&argc, &argv, "-out", s);
if (start > end)
{
fprintf(stdout, "Start MUST be LESS than end\n");
exit(420132);
}
args.nbuff = TRIALS;
Setup(&args);
Establish(&args);
if (args.tr)
{
if ((out = fopen(s, "w")) == NULL)
{
fprintf(stdout,"Can't open %s for output\n", s);
exit(1);
}
}
latency = TestLatency(&args);
synctime = TestSyncTime(&args);
if (args.tr)
{
SendTime(&args, &latency, &nzero);
}
else
{
RecvTime(&args, &latency, &nzero);
}
if (args.tr && printopt)
{
printf("Latency: %0.9f\n", latency);
fflush(stdout);
printf("Sync Time: %0.9f\n", synctime);
fflush(stdout);
printf("Now starting main loop\n");
fflush(stdout);
}
tlast = latency;
inc = (start > 1 && !detailflag) ? start/2: inc;
args.bufflen = start;
if (one_buffer)
{
args.buff = (char *)malloc(onebuffersize);
args.buff1 = (char*)malloc(onebuffersize);
}
/* Main loop of benchmark */
for (nq = n = 0, len = start;
n < g_NSAMP && tlast < g_STOPTM && len <= end && !quit;
len = len + inc, nq++)
{
if (nq > 2 && !detailflag)
inc = ((nq % 2))? inc + inc: inc;
/* This is a perturbation loop to test nearby values */
for (ipert = 0, pert = (!detailflag && inc > PERT + 1)? -PERT: 0;
pert <= PERT && !quit;
ipert++, n++,
pert += (!detailflag && inc > PERT + 1)? PERT: PERT + 1)
{
/* Calculate howmany times to repeat the experiment. */
if (args.tr)
{
if (args.bufflen == 0)
nrepeat = g_LATENCYREPS;
else
nrepeat = (int)(MAX((RUNTM / ((double)args.bufflen /
(args.bufflen - inc + 1.0) * tlast)), TRIALS));
SendTime(&args, &tzero, &nrepeat);
}
else
{
nrepeat = 1; /* Just needs to be greater than zero */
RecvTime(&args, &tzero, &nrepeat);
}
/* Allocate the buffer */
args.bufflen = len + pert;
if (one_buffer)
{
if (bNoCache)
{
if (args.bufflen * nrepeat + bufalign > onebuffersize)
{
fprintf(stdout, "Exceeded user specified buffer size\n");
fflush(stdout);
quit = 1;
break;
}
}
else
{
if (args.bufflen + bufalign > onebuffersize)
{
fprintf(stdout, "Exceeded user specified buffer size\n");
fflush(stdout);
quit = 1;
break;
}
}
}
else
{
/* printf("allocating %d bytes\n",
args.bufflen * nrepeat + bufalign); */
if (bNoCache)
{
if ((args.buff = (char *)malloc(args.bufflen * nrepeat + bufalign)) == (char *)NULL)
{
fprintf(stdout,"Couldn't allocate memory\n");
fflush(stdout);
break;
}
}
else
{
if ((args.buff = (char *)malloc(args.bufflen + bufalign)) == (char *)NULL)
{
fprintf(stdout,"Couldn't allocate memory\n");
fflush(stdout);
break;
}
}
/* if ((args.buff1 = (char *)malloc(args.bufflen * nrepeat + bufalign)) == (char *)NULL) */
if ((args.buff1 = (char *)malloc(args.bufflen + bufalign)) == (char *)NULL)
{
fprintf(stdout,"Couldn't allocate memory\n");
fflush(stdout);
break;
}
}
/* Possibly align the data buffer */
memtmp = args.buff;
memtmp1 = args.buff1;
if (!bNoCache)
{
if (bufalign != 0)
{
args.buff += (bufalign - ((MPI_Aint)args.buff % bufalign) + bufoffset) % bufalign;
/* args.buff1 += (bufalign - ((MPI_Aint)args.buff1 % bufalign) + bufoffset) % bufalign; */
}
}
if (bufalign != 0)
{
args.buff1 += (bufalign - ((MPI_Aint)args.buff1 % bufalign) + bufoffset) % bufalign;
}
if (args.tr && printopt)
{
fprintf(stdout,"%3d: %9d bytes %4d times --> ",
n, args.bufflen, nrepeat);
fflush(stdout);
}
/* Finally, we get to transmit or receive and time */
if (args.tr)
{
bwdata[n].t = LONGTIME;
t2 = t1 = 0;
for (i = 0; i < TRIALS; i++)
{
if (bNoCache)
{
if (bufalign != 0)
{
args.buff = memtmp + ((bufalign - ((MPI_Aint)args.buff % bufalign) + bufoffset) % bufalign);
/* args.buff1 = memtmp1 + ((bufalign - ((MPI_Aint)args.buff1 % bufalign) + bufoffset) % bufalign); */
}
else
{
args.buff = memtmp;
/* args.buff1 = memtmp1; */
}
}
Sync(&args);
t0 = When();
for (j = 0; j < nrepeat; j++)
{
if (bHeadToHead)
SendRecvData(&args);
else
{
SendData(&args);
if (!streamopt)
{
RecvData(&args);
}
}
if (bNoCache)
{
args.buff += args.bufflen;
/* args.buff1 += args.bufflen; */
}
}
t = (When() - t0)/((1 + !streamopt) * nrepeat);
if (!streamopt)
{
t2 += t*t;
t1 += t;
bwdata[n].t = MIN(bwdata[n].t, t);
}
}
if (!streamopt)
SendTime(&args, &bwdata[n].t, &nzero);
else
RecvTime(&args, &bwdata[n].t, &nzero);
if (!streamopt)
bwdata[n].variance = t2/TRIALS - t1/TRIALS * t1/TRIALS;
}
else
{
bwdata[n].t = LONGTIME;
t2 = t1 = 0;
for (i = 0; i < TRIALS; i++)
{
if (bNoCache)
{
if (bufalign != 0)
{
args.buff = memtmp + ((bufalign - ((MPI_Aint)args.buff % bufalign) + bufoffset) % bufalign);
/* args.buff1 = memtmp1 + ((bufalign - ((MPI_Aint)args.buff1 % bufalign) + bufoffset) % bufalign); */
}
else
{
args.buff = memtmp;
/* args.buff1 = memtmp1; */
}
}
Sync(&args);
t0 = When();
for (j = 0; j < nrepeat; j++)
{
if (bHeadToHead)
SendRecvData(&args);
else
{
RecvData(&args);
if (!streamopt)
SendData(&args);
}
if (bNoCache)
{
args.buff += args.bufflen;
/* args.buff1 += args.bufflen; */
}
}
t = (When() - t0)/((1 + !streamopt) * nrepeat);
if (streamopt)
{
t2 += t*t;
t1 += t;
bwdata[n].t = MIN(bwdata[n].t, t);
}
}
if (streamopt)
SendTime(&args, &bwdata[n].t, &nzero);
else
RecvTime(&args, &bwdata[n].t, &nzero);
if (streamopt)
bwdata[n].variance = t2/TRIALS - t1/TRIALS * t1/TRIALS;
}
tlast = bwdata[n].t;
bwdata[n].bits = args.bufflen * CHARSIZE;
bwdata[n].bps = bwdata[n].bits / (bwdata[n].t * 1024 * 1024);
bwdata[n].repeat = nrepeat;
if (args.tr)
{
if (bSavePert)
{
/* fprintf(out,"%f\t%f\t%d\t%d\t%f\n", bwdata[n].t, bwdata[n].bps,
bwdata[n].bits, bwdata[n].bits / 8, bwdata[n].variance); */
if (bUseMegaBytes)
fprintf(out,"%d\t%f\t%0.9f\n", bwdata[n].bits / 8, bwdata[n].bps / 8, bwdata[n].t);
else
fprintf(out,"%d\t%f\t%0.9f\n", bwdata[n].bits / 8, bwdata[n].bps, bwdata[n].t);
fflush(out);
}
}
if (!one_buffer)
{
free(memtmp);
free(memtmp1);
}
if (args.tr && printopt)
{
if (bUseMegaBytes)
fprintf(stdout," %6.2f MBps in %0.9f sec\n", bwdata[n].bps / 8, tlast);
else
fprintf(stdout," %6.2f Mbps in %0.9f sec\n", bwdata[n].bps, tlast);
fflush(stdout);
}
} /* End of perturbation loop */
if (!bSavePert && args.tr)
{
/* if we didn't save all of the perturbation loops, find the max and save it */
int index = 1;
double dmax = bwdata[n-1].bps;
for (; ipert > 1; ipert--)
{
if (bwdata[n-ipert].bps > dmax)
{
index = ipert;
dmax = bwdata[n-ipert].bps;
}
}
if (bUseMegaBytes)
fprintf(out,"%d\t%f\t%0.9f\n", bwdata[n-index].bits / 8, bwdata[n-index].bps / 8, bwdata[n-index].t);
else
fprintf(out,"%d\t%f\t%0.9f\n", bwdata[n-index].bits / 8, bwdata[n-index].bps, bwdata[n-index].t);
fflush(out);
}
} /* End of main loop */
if (args.tr)
fclose(out);
/* THE_END: */
CleanUp(&args);
free(bwdata);
return 0;
}