size_t AwsXcl::xclPerfMonReadTrace()

in SDAccel/userspace/src/perf.cpp [497:627]


  size_t AwsXcl::xclPerfMonReadTrace(xclPerfMonType type, xclTraceResultsVector& traceVector) {
    if (mLogStream.is_open()) {
      mLogStream << __func__ << ", " << std::this_thread::get_id()
        << ", " << type << ", " << &traceVector
        << ", Reading device trace stream..." << std::endl;
    }

    traceVector.mLength = 0;
    if (!mIsDeviceProfiling)
      return 0;

    uint32_t numSamples = xclPerfMonGetTraceCount(type);
    if (numSamples == 0)
      return 0;

    uint64_t fifoReadAddress[] = { 0, 0, 0 };
    if (type == XCL_PERF_MON_MEMORY) {
      fifoReadAddress[0] = getPerfMonFifoReadBaseAddress(type, 0) + AXI_FIFO_RDFD_AXI_FULL;
    }
    else {
      for (int i = 0; i < 3; i++)
        fifoReadAddress[i] = getPerfMonFifoReadBaseAddress(type, i) + AXI_FIFO_RDFD;
    }

    size_t size = 0;

    // Limit to max number of samples so we don't overrun trace buffer on host
    uint32_t maxSamples = getPerfMonNumberSamples(type);
    numSamples = (numSamples > maxSamples) ? maxSamples : numSamples;
    traceVector.mLength = numSamples;

    const uint32_t bytesPerSample = (XPAR_AXI_PERF_MON_0_TRACE_WORD_WIDTH / 8);
    const uint32_t wordsPerSample = (XPAR_AXI_PERF_MON_0_TRACE_WORD_WIDTH / 32);
    //uint32_t numBytes = numSamples * bytesPerSample;
    uint32_t numWords = numSamples * wordsPerSample;

    // Create trace buffer on host (requires alignment)
    const int BUFFER_BYTES = MAX_TRACE_NUMBER_SAMPLES * bytesPerSample;
    const int BUFFER_WORDS = MAX_TRACE_NUMBER_SAMPLES * wordsPerSample;
#if GCC_VERSION >= 40800
    alignas(AXI_FIFO_RDFD_AXI_FULL)uint32_t hostbuf[BUFFER_WORDS];
#else
    AlignedAllocator<uint32_t> alignedBuffer(AXI_FIFO_RDFD_AXI_FULL, BUFFER_WORDS);
    uint32_t* hostbuf = alignedBuffer.getBuffer();
#endif
      memset((void *)hostbuf, 0, BUFFER_BYTES);

      // Iterate over chunks
      // NOTE: AXI limits this to 4K bytes per transfer
      uint32_t chunkSizeWords = 256 * wordsPerSample;
      if (chunkSizeWords > 1024) chunkSizeWords = 1024;
      uint32_t chunkSizeBytes = 4 * chunkSizeWords;
      uint32_t words = 0;

      // Read trace a chunk of bytes at a time
      if (numWords > chunkSizeWords) {
        for (; words < (numWords - chunkSizeWords); words += chunkSizeWords) {
          if (mLogStream.is_open()) {
            mLogStream << __func__ << ": reading " << chunkSizeBytes << " bytes from 0x"
              << std::hex << fifoReadAddress[0] << " and writing it to 0x"
              << (void *)(hostbuf + words) << std::dec << std::endl;
          }

          if (awsbwhal::unmgdPread(mUserHandle, (void *)(hostbuf + words), chunkSizeBytes, fifoReadAddress[0]) < 0)
            return 0;

          size += chunkSizeBytes;
        }
      }

      // Read remainder of trace not divisible by chunk size
      if (words < numWords) {
        chunkSizeBytes = 4 * (numWords - words);

        if (mLogStream.is_open()) {
          mLogStream << __func__ << ": reading " << chunkSizeBytes << " bytes from 0x"
            << std::hex << fifoReadAddress[0] << " and writing it to 0x"
            << (void *)(hostbuf + words) << std::dec << std::endl;
        }

        if (awsbwhal::unmgdPread(mUserHandle, (void *)(hostbuf + words), chunkSizeBytes, fifoReadAddress[0]) < 0)
          return 0;

        size += chunkSizeBytes;
      }

      if (mLogStream.is_open()) {
        mLogStream << __func__ << ": done reading " << size << " bytes " << std::endl;
      }

    // ******************************
    // Read & process all trace FIFOs
    // ******************************
    for (uint32_t wordnum = 0; wordnum < numSamples; wordnum++) {
      uint32_t index = wordsPerSample * wordnum;
      xclTraceResults results;
      uint64_t temp = 0;

      temp = *(hostbuf + index) | (uint64_t)*(hostbuf + index + 1) << 32;
      if (!temp)
        continue;

      // Initialize result to 0
      memset(&results, 0, sizeof(xclTraceResults));
      // SDSoC Packet Format
      results.Timestamp = temp & 0x1FFFFFFFFFFF;
      results.EventType = ((temp >> 45) & 0xF) ? XCL_PERF_MON_END_EVENT :
        XCL_PERF_MON_START_EVENT;
      results.TraceID = (temp >> 49) & 0xFFF;
      results.Reserved = (temp >> 61) & 0x1;
      results.Overflow = (temp >> 62) & 0x1;
      results.Error = (temp >> 63) & 0x1;
      results.EventID = XCL_PERF_MON_HW_EVENT;
      traceVector.mArray[wordnum] = results;

      if (mLogStream.is_open()) {
        mLogStream << "  Trace sample " << std::dec << wordnum << ": ";
        mLogStream << dec2bin(uint32_t(temp >> 32)) << " " << dec2bin(uint32_t(temp & 0xFFFFFFFF));
        mLogStream << std::endl;
        mLogStream << " Timestamp : " << results.Timestamp << "   ";
        mLogStream << "Event Type : " << results.EventType << "   ";
        mLogStream << "slotID : " << results.TraceID << "   ";
        mLogStream << "Start, Stop : " << static_cast<int>(results.Reserved) << "   ";
        mLogStream << "Overflow : " << static_cast<int>(results.Overflow) << "   ";
        mLogStream << "Error : " << static_cast<int>(results.Error) << "   ";
        mLogStream << std::endl;
      }
    }

    return size;
  } // end xclPerfMonReadTrace