in SDAccel/userspace/src/shim.cpp [723:1165]
int AwsXcl::pcieBarWrite(int bar_num, unsigned long long offset, const void* buffer, unsigned long long length) {
char *qBuf = (char *)buffer;
switch (bar_num) {
#ifdef INTERNAL_TESTING
char *mem = 0;
case 0:
{
if ((length + offset) > MMAP_SIZE_USER) {
return -1;
}
mem = mUserMap;
#else
case APP_PF_BAR0:
{
#endif
break;
}
default:
{
return -1;
}
}
while (length >= 4) {
#ifdef INTERNAL_TESTING
*(unsigned *)(mem + offset) = *(unsigned *)qBuf;
#else
fpga_pci_poke(ocl_kernel_bar, uint64_t (offset), *((uint32_t*) qBuf));
#endif
offset += 4;
qBuf += 4;
length -= 4;
}
while (length) {
#ifdef INTERNEL_TESTING
*(mem + offset) = *qBuf;
#else
std::cout << "xclWrite - unsupported write with length not multiple of 4-bytes" << std::endl;
#endif
offset++;
qBuf++;
length--;
}
// std::memcpy(mem + offset, buffer, length);
return 0;
}
bool AwsXcl::zeroOutDDR()
{
// Zero out the FPGA external DRAM Content so memory controller
// will not complain about ECC error from memory regions not
// initialized before
// In AWS F1 FPGA, the DRAM is clear before loading new AFI
// hence this API is redundant and will return false to
// make sure developers dont assume it works
// static const unsigned long long BLOCK_SIZE = 0x4000000;
// void *buf = 0;
// if (posix_memalign(&buf, DDR_BUFFER_ALIGNMENT, BLOCK_SIZE))
// return false;
// memset(buf, 0, BLOCK_SIZE);
// mDataMover->pset64(buf, BLOCK_SIZE, 0, mDeviceInfo.mDDRSize/BLOCK_SIZE);
// free(buf);
return false;
}
/* Locks a given FPGA Slot
* By levering the available lock infrastrucutre for the DMA
* Driver associated with the given FPGA slot
*/
bool AwsXcl::xclLockDevice()
{
#ifdef INTERNAL_TESTING
#else
// FIXME: do we need to flock the ocl_kernel interface as well ?
//
#endif
mLocked = true;
// return zeroOutDDR();
return true;
}
std::string AwsXcl::getDSAName(unsigned short deviceId, unsigned short subsystemId)
{
std::string dsa("xilinx_aws-vu9p-f1-04261818_dynamic_5_0");
return dsa;
}
int AwsXcl::xclGetDeviceInfo2(xclDeviceInfo2 *info)
{
std::memset(info, 0, sizeof(xclDeviceInfo2));
info->mMagic = 0X586C0C6C;
info->mHALMajorVersion = XCLHAL_MAJOR_VER;
info->mHALMajorVersion = XCLHAL_MINOR_VER;
info->mMinTransferSize = DDR_BUFFER_ALIGNMENT;
info->mDMAThreads = 4;//AWS has four threads. Others have only two threads
#ifdef INTERNAL_TESTING
/* Sarab disabling xdma ioctl
xdma_ioc_info obj = {{0X586C0C6C, XDMA_IOCINFO}};
/--* Calling the underlying DMA driver to extract
* DMA specific configuration
* A non-zero value reprent at error
*--/
int ret = ioctl(mUserHandle, XDMA_IOCINFO, &obj);
// Log the return value for further debug
if (ret)
return ret;
info->mVendorId = obj.vendor;
info->mDeviceId = obj.device;
info->mSubsystemId = obj.subsystem_device;
info->mSubsystemVendorId = obj.subsystem_vendor;
info->mDeviceVersion = obj.subsystem_device & 0x00ff;
*/
awsmgmt_ioc_info mgmt_info_obj;
int ret = ioctl(mMgtHandle, AWSMGMT_IOCINFO, &mgmt_info_obj);
if (ret)
return ret;
info->mVendorId = mgmt_info_obj.vendor;
info->mDeviceId = mgmt_info_obj.device;
info->mSubsystemId = mgmt_info_obj.subsystem_device;
info->mSubsystemVendorId = mgmt_info_obj.subsystem_vendor;
info->mDeviceVersion = mgmt_info_obj.subsystem_device & 0x00ff;
info->mPCIeLinkWidth = mgmt_info_obj.pcie_link_width;
info->mPCIeLinkSpeed = mgmt_info_obj.pcie_link_speed;
for (int i = 0; i < AWSMGMT_NUM_SUPPORTED_CLOCKS; ++i) {
info->mOCLFrequency[i] = mgmt_info_obj.ocl_frequency[i];
}
info->mMigCalib = true;
for (int i = 0; i < 4; i++) {
info->mMigCalib = info->mMigCalib && mgmt_info_obj.mig_calibration[i];
}
#else
struct fpga_slot_spec slot_info;
//fpga_pci_get_slot_spec(mBoardNumber,FPGA_APP_PF, &slot_info);
fpga_pci_get_slot_spec(mBoardNumber, &slot_info);
info->mVendorId = slot_info.map[0].vendor_id;
info->mDeviceId = slot_info.map[0].device_id;
// FIXME - update next 3 variables
info->mSubsystemId = slot_info.map[0].subsystem_device_id;
info->mSubsystemVendorId = slot_info.map[0].subsystem_vendor_id;
info->mDeviceVersion = 0;
info->mPCIeLinkWidth = 16;
info->mPCIeLinkSpeed = 8000;
fpga_mgmt_image_info imageInfo;
fpga_mgmt_describe_local_image( mBoardNumber, &imageInfo, 0 );
for (int i = 0; i < AWSMGMT_NUM_SUPPORTED_CLOCKS; ++i) {
info->mOCLFrequency[i] = imageInfo.metrics.clocks[i].frequency[0] / 1000000;
}
info->mMigCalib = true;
#endif
// F1 has 16 GiB per channel
info->mDDRSize = 0x400000000 * 4;
info->mDataAlignment = DDR_BUFFER_ALIGNMENT;
info->mNumClocks = AWSMGMT_NUM_ACTUAL_CLOCKS;
// Number of available channels
// TODO: add support for other FPGA configurations with less
// than 4 DRAM channels
info->mDDRBankCount = 4;
const std::string deviceName = getDSAName(info->mDeviceId, info->mSubsystemId);
if (mLogStream.is_open())
mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << deviceName << std::endl;
std::size_t length = deviceName.copy(info->mName, deviceName.length(),0);
info->mName[length] = '\0';
if (mLogStream.is_open()) {
mLogStream << __func__ << ": name=" << deviceName << ", version=0x" << std::hex << info->mDeviceVersion
<< ", clock freq=" << std::dec << info->mOCLFrequency[0]
<< ", clock freq 2=" << std::dec << info->mOCLFrequency[1] << std::endl;
}
info->mOnChipTemp = 25;
info->mFanTemp = 0;
info->mVInt = 0.9;
info->mVAux = 0.9;
info->mVBram = 0.9;
return 0;
}
int AwsXcl::resetDevice(xclResetKind kind) {
// Call a new IOCTL to just reset the OCL region
// if (kind == XCL_RESET_FULL) {
// xdma_ioc_base obj = {0X586C0C6C, XDMA_IOCHOTRESET};
// return ioctl(mUserHandle, XDMA_IOCHOTRESET, &obj);
// }
// else if (kind == XCL_RESET_KERNEL) {
// xdma_ioc_base obj = {0X586C0C6C, XDMA_IOCOCLRESET};
// return ioctl(mUserHandle, XDMA_IOCOCLRESET, &obj);
// }
// return -EINVAL;
// AWS FIXME - add reset
return 0;
}
int AwsXcl::xclReClock2(unsigned short region, const unsigned short *targetFreqMHz)
{
#ifdef INTERNAL_TESTING
awsmgmt_ioc_freqscaling obj = {0, targetFreqMHz[0], targetFreqMHz[1], targetFreqMHz[2], 0};
return ioctl(mMgtHandle, AWSMGMT_IOCFREQSCALING, &obj);
#else
// # error "INTERNAL_TESTING macro disabled. AMZN code goes here. "
// # This API is not supported in AWS, the frequencies are set per AFI
return 0;
#endif
}
ssize_t AwsXcl::xclUnmgdPwrite(unsigned flags, const void *buf, size_t count, uint64_t offset)
{
if (flags)
return -EINVAL;
drm_xocl_pwrite_unmgd unmgd = {0, 0, offset, count, reinterpret_cast<uint64_t>(buf)};
return ioctl(mUserHandle, DRM_IOCTL_XOCL_PWRITE_UNMGD, &unmgd);
}
ssize_t AwsXcl::xclUnmgdPread(unsigned flags, void *buf, size_t count, uint64_t offset)
{
if (flags)
return -EINVAL;
drm_xocl_pread_unmgd unmgd = {0, 0, offset, count, reinterpret_cast<uint64_t>(buf)};
return ioctl(mUserHandle, DRM_IOCTL_XOCL_PREAD_UNMGD, &unmgd);
}
int AwsXcl::xclExportBO(unsigned int boHandle)
{
drm_prime_handle info = {boHandle, 0, -1};
int result = ioctl(mUserHandle, DRM_IOCTL_PRIME_HANDLE_TO_FD, &info);
return !result ? info.fd : result;
}
unsigned int AwsXcl::xclImportBO(int fd, unsigned flags)
{
/*Sarab
drm_xocl_userptr_bo user = {reinterpret_cast<uint64_t>(userptr), size, mNullBO, flags};
int result = ioctl(mUserHandle, DRM_IOCTL_XOCL_USERPTR_BO, &user);
*/
drm_prime_handle info = {mNullBO, flags, fd};
int result = ioctl(mUserHandle, DRM_IOCTL_PRIME_FD_TO_HANDLE, &info);
if (result) {
std::cout << __func__ << " ERROR: FD to handle IOCTL failed" << std::endl;
}
return !result ? info.handle : mNullBO;
}
int AwsXcl::xclGetBOProperties(unsigned int boHandle, xclBOProperties *properties)
{
drm_xocl_info_bo info = {boHandle, 0, 0, 0};
int result = ioctl(mUserHandle, DRM_IOCTL_XOCL_INFO_BO, &info);
properties->handle = info.handle;
properties->flags = info.flags;
properties->size = info.size;
properties->paddr = info.paddr;
properties->domain = XCL_BO_DEVICE_RAM; // currently all BO domains are XCL_BO_DEVICE_RAM
return result ? mNullBO : 0;
}
bool AwsXcl::xclUnlockDevice()
{
flock(mUserHandle, LOCK_UN);
mLocked = false;
return true;
}
// Assume that the memory is always
// created for the device ddr for now. Ignoring the flags as well.
unsigned int AwsXcl::xclAllocBO(size_t size, xclBOKind domain, unsigned flags)
{
drm_xocl_create_bo info = {size, mNullBO, flags};
int result = ioctl(mUserHandle, DRM_IOCTL_XOCL_CREATE_BO, &info);
if (result) {
std::cout << __func__ << " ERROR: AllocBO IOCTL failed" << std::endl;
}
return result ? mNullBO : info.handle;
}
unsigned int AwsXcl::xclAllocUserPtrBO(void *userptr, size_t size, unsigned flags)
{
drm_xocl_userptr_bo user = {reinterpret_cast<uint64_t>(userptr), size, mNullBO, flags};
int result = ioctl(mUserHandle, DRM_IOCTL_XOCL_USERPTR_BO, &user);
return result ? mNullBO : user.handle;
}
void AwsXcl::xclFreeBO(unsigned int boHandle)
{
drm_gem_close closeInfo = {boHandle, 0};
ioctl(mUserHandle, DRM_IOCTL_GEM_CLOSE, &closeInfo);
}
int AwsXcl::xclWriteBO(unsigned int boHandle, const void *src, size_t size, size_t seek)
{
drm_xocl_pwrite_bo pwriteInfo = { boHandle, 0, seek, size, reinterpret_cast<uint64_t>(src) };
return ioctl(mUserHandle, DRM_IOCTL_XOCL_PWRITE_BO, &pwriteInfo);
}
int AwsXcl::xclReadBO(unsigned int boHandle, void *dst, size_t size, size_t skip)
{
drm_xocl_pread_bo preadInfo = { boHandle, 0, skip, size, reinterpret_cast<uint64_t>(dst) };
return ioctl(mUserHandle, DRM_IOCTL_XOCL_PREAD_BO, &preadInfo);
}
void *AwsXcl::xclMapBO(unsigned int boHandle, bool write)
{
drm_xocl_info_bo info = { boHandle, 0, 0 };
int result = ioctl(mUserHandle, DRM_IOCTL_XOCL_INFO_BO, &info);
if (result)
return nullptr;
drm_xocl_map_bo mapInfo = { boHandle, 0, 0 };
result = ioctl(mUserHandle, DRM_IOCTL_XOCL_MAP_BO, &mapInfo);
if (result)
return nullptr;
return mmap(0, info.size, (write ? (PROT_READ|PROT_WRITE) : PROT_READ),
MAP_SHARED, mUserHandle, mapInfo.offset);
}
int AwsXcl::xclSyncBO(unsigned int boHandle, xclBOSyncDirection dir,
size_t size, size_t offset)
{
drm_xocl_sync_bo_dir drm_dir = (dir == XCL_BO_SYNC_BO_TO_DEVICE) ?
DRM_XOCL_SYNC_BO_TO_DEVICE :
DRM_XOCL_SYNC_BO_FROM_DEVICE;
drm_xocl_sync_bo syncInfo = {boHandle, 0, size, offset, drm_dir};
return ioctl(mUserHandle, DRM_IOCTL_XOCL_SYNC_BO, &syncInfo);
}
#ifndef INTERNAL_TESTING
int AwsXcl::loadDefaultAfiIfCleared( void )
{
int array_len = 16;
fpga_slot_spec spec_array[ array_len ];
std::memset( spec_array, mBoardNumber, sizeof(fpga_slot_spec) * array_len );
fpga_pci_get_all_slot_specs( spec_array, array_len );
if( spec_array[mBoardNumber].map[FPGA_APP_PF].device_id == AWS_UserPF_DEVICE_ID ) {
std::string agfi = DEFAULT_GLOBAL_AFI;
fpga_mgmt_load_local_image( mBoardNumber, const_cast<char*>(agfi.c_str()) );
if( sleepUntilLoaded( agfi ) ) {
std::cout << "ERROR: Sleep until load failed." << std::endl;
return -1;
}
fpga_pci_rescan_slot_app_pfs( mBoardNumber );
}
return 0;
}
int AwsXcl::sleepUntilLoaded( const std::string afi )
{
fpga_mgmt_image_info info;
int max_retries = 20;
int seconds_to_wait = 5;
for( int i = 0; i < max_retries; i++ ) {
// Wait for 10 seconds before checking status
std::this_thread::sleep_for( std::chrono::seconds( seconds_to_wait ) );
std::memset( &info, 0, sizeof(struct fpga_mgmt_image_info) );
// Get describe result in the info object
int result = fpga_mgmt_describe_local_image( mBoardNumber, &info, 0 );
if( result ) {
std::cout << "ERROR: Load image failed." << std::endl;
return 1;
}
if( (info.status == FPGA_STATUS_LOADED) && !std::strcmp(info.ids.afi_id, const_cast<char*>(afi.c_str())) ) {
break;
}
// Increment wait time
seconds_to_wait++;
}
// If after the timeout we check once more if our status is LOADED
if( info.status != FPGA_STATUS_LOADED ) {
std::cout << "ERROR: Load image failed after waiting till timeout." << std::endl;
return 1;
}
// After the AFI is loaded, we check again if the AFI ID is the correct one
if( std::strcmp(info.ids.afi_id, const_cast<char*>(afi.c_str())) ) {
std::cout << "ERROR: AFI loaded is not the one we are waiting on." << std::endl;
return 1;
}
// If we have reached here, things look good
return 0;
}
int AwsXcl::checkAndSkipReload( char *afi_id, fpga_mgmt_image_info *orig_info )
{
if( (orig_info->status == FPGA_STATUS_LOADED) && !std::strcmp(orig_info->ids.afi_id, afi_id) ) {
std::cout << "This AFI already loaded. Skip reload!" << std::endl;
int result = 0;
//existing afi matched.
uint16_t status = 0;
result = fpga_mgmt_get_vDIP_status(mBoardNumber, &status);
if(result) {
printf("Error: can not get virtual DIP Switch state\n");
return result;
}
//Set bit 0 to 1
status |= (1 << 0);
result = fpga_mgmt_set_vDIP(mBoardNumber, status);
if(result) {
printf("Error trying to set virtual DIP Switch \n");
return result;
}
std::this_thread::sleep_for(std::chrono::microseconds(250));
//pulse the changes in.
result = fpga_mgmt_get_vDIP_status(mBoardNumber, &status);
if(result) {
printf("Error: can not get virtual DIP Switch state\n");
return result;
}
//Set bit 0 to 0
status &= ~(1 << 0);
result = fpga_mgmt_set_vDIP(mBoardNumber, status);
if(result) {
printf("Error trying to set virtual DIP Switch \n");
return result;
}
std::this_thread::sleep_for(std::chrono::microseconds(250));
printf("Successfully skipped reloading of local image.\n");
return result;
} else {
std::cout << "AFI not yet loaded, proceed to download." << std::endl;
return 1;
}
}
#endif
} /* end namespace awsbmhal */