in src/sim/sim_driver.cc [186:226]
void Load_int8(const VTAMemInsn* op,
DRAM* dram,
uint64_t* load_counter,
bool skip_exec) {
CHECK_EQ(kBits, VTA_ACC_WIDTH);
// TODO(zhanghao): extend to other width
CHECK_EQ(VTA_ACC_WIDTH, 32);
CHECK_EQ(VTA_INP_WIDTH, 8);
int factor = VTA_ACC_WIDTH / VTA_INP_WIDTH;
load_counter[0] += (op->x_size * op->y_size) * kElemBytes;
if (skip_exec) return;
DType* sram_ptr = data_ + op->sram_base;
int8_t* dram_ptr = static_cast<int8_t*>(dram->GetAddr(
op->dram_base * kElemBytes / factor));
uint64_t xtotal = op->x_size + op->x_pad_0 + op->x_pad_1;
uint32_t ytotal = op->y_size + op->y_pad_0 + op->y_pad_1;
uint64_t sram_end = op->sram_base + xtotal * ytotal;
CHECK_LE(sram_end, kMaxNumElem);
memset(sram_ptr, 0, kElemBytes * xtotal * op->y_pad_0);
sram_ptr += xtotal * op->y_pad_0;
for (uint32_t y = 0; y < op->y_size; ++y) {
memset(sram_ptr, 0, kElemBytes * op->x_pad_0);
sram_ptr += op->x_pad_0;
int32_t* sram_ele_ptr = (int32_t*)sram_ptr;
for (uint32_t x = 0; x < op->x_size * VTA_BATCH * VTA_BLOCK_OUT; ++x) {
*(sram_ele_ptr + x) = (int32_t)*(dram_ptr + x);
}
sram_ptr += op->x_size;
memset(sram_ptr, 0, kElemBytes * op->x_pad_1);
sram_ptr += op->x_pad_1;
// dram one element is 1 bytes rather than 4 bytes
dram_ptr += kElemBytes / factor * op->x_stride;
}
memset(sram_ptr, 0, kElemBytes * xtotal * op->y_pad_1);
}