in src/pynq/pynq_driver.cc [113:143]
int Run(vta_phy_addr_t insn_phy_addr,
uint32_t insn_count,
uint32_t wait_cycles) {
VTAWriteMappedReg(vta_fetch_handle_, VTA_FETCH_INSN_COUNT_OFFSET, insn_count);
VTAWriteMappedReg(vta_fetch_handle_, VTA_FETCH_INSN_ADDR_OFFSET, insn_phy_addr);
VTAWriteMappedReg(vta_load_handle_, VTA_LOAD_INP_ADDR_OFFSET, 0);
VTAWriteMappedReg(vta_load_handle_, VTA_LOAD_WGT_ADDR_OFFSET, 0);
VTAWriteMappedReg(vta_compute_handle_, VTA_COMPUTE_UOP_ADDR_OFFSET, 0);
VTAWriteMappedReg(vta_compute_handle_, VTA_COMPUTE_BIAS_ADDR_OFFSET, 0);
VTAWriteMappedReg(vta_store_handle_, VTA_STORE_OUT_ADDR_OFFSET, 0);
// VTA start
VTAWriteMappedReg(vta_fetch_handle_, 0x0, VTA_START);
VTAWriteMappedReg(vta_load_handle_, 0x0, VTA_AUTORESTART);
VTAWriteMappedReg(vta_compute_handle_, 0x0, VTA_AUTORESTART);
VTAWriteMappedReg(vta_store_handle_, 0x0, VTA_AUTORESTART);
// Allow device to respond
struct timespec ts = { .tv_sec = 0, .tv_nsec = 1000 };
nanosleep(&ts, &ts);
// Loop until the VTA is done
unsigned t, flag = 0;
for (t = 0; t < wait_cycles; ++t) {
flag = VTAReadMappedReg(vta_compute_handle_, VTA_COMPUTE_DONE_RD_OFFSET);
if (flag == VTA_DONE) break;
std::this_thread::yield();
}
// Report error if timeout
return t < wait_cycles ? 0 : 1;
}