in SDAccel/userspace/src/perf.cpp [497:627]
size_t AwsXcl::xclPerfMonReadTrace(xclPerfMonType type, xclTraceResultsVector& traceVector) {
if (mLogStream.is_open()) {
mLogStream << __func__ << ", " << std::this_thread::get_id()
<< ", " << type << ", " << &traceVector
<< ", Reading device trace stream..." << std::endl;
}
traceVector.mLength = 0;
if (!mIsDeviceProfiling)
return 0;
uint32_t numSamples = xclPerfMonGetTraceCount(type);
if (numSamples == 0)
return 0;
uint64_t fifoReadAddress[] = { 0, 0, 0 };
if (type == XCL_PERF_MON_MEMORY) {
fifoReadAddress[0] = getPerfMonFifoReadBaseAddress(type, 0) + AXI_FIFO_RDFD_AXI_FULL;
}
else {
for (int i = 0; i < 3; i++)
fifoReadAddress[i] = getPerfMonFifoReadBaseAddress(type, i) + AXI_FIFO_RDFD;
}
size_t size = 0;
// Limit to max number of samples so we don't overrun trace buffer on host
uint32_t maxSamples = getPerfMonNumberSamples(type);
numSamples = (numSamples > maxSamples) ? maxSamples : numSamples;
traceVector.mLength = numSamples;
const uint32_t bytesPerSample = (XPAR_AXI_PERF_MON_0_TRACE_WORD_WIDTH / 8);
const uint32_t wordsPerSample = (XPAR_AXI_PERF_MON_0_TRACE_WORD_WIDTH / 32);
//uint32_t numBytes = numSamples * bytesPerSample;
uint32_t numWords = numSamples * wordsPerSample;
// Create trace buffer on host (requires alignment)
const int BUFFER_BYTES = MAX_TRACE_NUMBER_SAMPLES * bytesPerSample;
const int BUFFER_WORDS = MAX_TRACE_NUMBER_SAMPLES * wordsPerSample;
#if GCC_VERSION >= 40800
alignas(AXI_FIFO_RDFD_AXI_FULL)uint32_t hostbuf[BUFFER_WORDS];
#else
AlignedAllocator<uint32_t> alignedBuffer(AXI_FIFO_RDFD_AXI_FULL, BUFFER_WORDS);
uint32_t* hostbuf = alignedBuffer.getBuffer();
#endif
memset((void *)hostbuf, 0, BUFFER_BYTES);
// Iterate over chunks
// NOTE: AXI limits this to 4K bytes per transfer
uint32_t chunkSizeWords = 256 * wordsPerSample;
if (chunkSizeWords > 1024) chunkSizeWords = 1024;
uint32_t chunkSizeBytes = 4 * chunkSizeWords;
uint32_t words = 0;
// Read trace a chunk of bytes at a time
if (numWords > chunkSizeWords) {
for (; words < (numWords - chunkSizeWords); words += chunkSizeWords) {
if (mLogStream.is_open()) {
mLogStream << __func__ << ": reading " << chunkSizeBytes << " bytes from 0x"
<< std::hex << fifoReadAddress[0] << " and writing it to 0x"
<< (void *)(hostbuf + words) << std::dec << std::endl;
}
if (awsbwhal::unmgdPread(mUserHandle, (void *)(hostbuf + words), chunkSizeBytes, fifoReadAddress[0]) < 0)
return 0;
size += chunkSizeBytes;
}
}
// Read remainder of trace not divisible by chunk size
if (words < numWords) {
chunkSizeBytes = 4 * (numWords - words);
if (mLogStream.is_open()) {
mLogStream << __func__ << ": reading " << chunkSizeBytes << " bytes from 0x"
<< std::hex << fifoReadAddress[0] << " and writing it to 0x"
<< (void *)(hostbuf + words) << std::dec << std::endl;
}
if (awsbwhal::unmgdPread(mUserHandle, (void *)(hostbuf + words), chunkSizeBytes, fifoReadAddress[0]) < 0)
return 0;
size += chunkSizeBytes;
}
if (mLogStream.is_open()) {
mLogStream << __func__ << ": done reading " << size << " bytes " << std::endl;
}
// ******************************
// Read & process all trace FIFOs
// ******************************
for (uint32_t wordnum = 0; wordnum < numSamples; wordnum++) {
uint32_t index = wordsPerSample * wordnum;
xclTraceResults results;
uint64_t temp = 0;
temp = *(hostbuf + index) | (uint64_t)*(hostbuf + index + 1) << 32;
if (!temp)
continue;
// Initialize result to 0
memset(&results, 0, sizeof(xclTraceResults));
// SDSoC Packet Format
results.Timestamp = temp & 0x1FFFFFFFFFFF;
results.EventType = ((temp >> 45) & 0xF) ? XCL_PERF_MON_END_EVENT :
XCL_PERF_MON_START_EVENT;
results.TraceID = (temp >> 49) & 0xFFF;
results.Reserved = (temp >> 61) & 0x1;
results.Overflow = (temp >> 62) & 0x1;
results.Error = (temp >> 63) & 0x1;
results.EventID = XCL_PERF_MON_HW_EVENT;
traceVector.mArray[wordnum] = results;
if (mLogStream.is_open()) {
mLogStream << " Trace sample " << std::dec << wordnum << ": ";
mLogStream << dec2bin(uint32_t(temp >> 32)) << " " << dec2bin(uint32_t(temp & 0xFFFFFFFF));
mLogStream << std::endl;
mLogStream << " Timestamp : " << results.Timestamp << " ";
mLogStream << "Event Type : " << results.EventType << " ";
mLogStream << "slotID : " << results.TraceID << " ";
mLogStream << "Start, Stop : " << static_cast<int>(results.Reserved) << " ";
mLogStream << "Overflow : " << static_cast<int>(results.Overflow) << " ";
mLogStream << "Error : " << static_cast<int>(results.Error) << " ";
mLogStream << std::endl;
}
}
return size;
} // end xclPerfMonReadTrace