in hardware/xilinx/src/vta.cc [517:563]
void store(
volatile bus_T *outputs,
hls::stream<insn_T> &store_queue,
hls::stream<bool> &g2s_dep_queue,
hls::stream<bool> &s2g_dep_queue,
bus_T out_mem[VTA_ACC_BUFF_DEPTH][OUT_MAT_AXI_RATIO]) {
#pragma HLS INTERFACE m_axi port = outputs offset = slave bundle = data_port
#pragma HLS INTERFACE axis port = store_queue
#pragma HLS INTERFACE axis port = g2s_dep_queue
#pragma HLS INTERFACE axis port = s2g_dep_queue
#pragma HLS INTERFACE bram port = out_mem
#pragma HLS INTERFACE s_axilite port = return bundle = CONTROL_BUS
#pragma HLS RESOURCE variable = out_mem core = RAM_1P
// Pop store instruction
insn_T raw_insn = store_queue.read();
// Cast to MemInsn
insn_T raw_copy = raw_insn;
VTAMemInsn insn = *((VTAMemInsn *) &raw_copy);
// Pop dependence token if instructed
if (insn.pop_prev_dep) {
g2s_dep_queue.read();
}
// Initialize indices
memop_sram_T sram_idx = insn.sram_base;
memop_dram_T dram_idx = insn.dram_base;
// Copy along y dimension
for (int y = 0; y < insn.y_size; y++) {
#pragma HLS PIPELINE
// Perform data transfer
memcpy(
const_cast<bus_T*>(&outputs[dram_idx * OUT_MAT_AXI_RATIO]),
(const bus_T*) &out_mem[sram_idx][0],
insn.x_size * VTA_OUT_ELEM_BYTES);
#pragma HLS RESOURCE variable = sram_idx core = Mul_LUT
sram_idx += insn.x_size;
dram_idx += insn.x_stride;
}
// Push dependence token if instructed
if (insn.push_prev_dep) {
s2g_dep_queue.write(1);
}
}