in hardware/xilinx/src/vta.cc [169:234]
void load(
volatile bus_T *inputs,
volatile bus_T *weights,
hls::stream<insn_T> &load_queue,
hls::stream<bool> &g2l_dep_queue,
hls::stream<bool> &l2g_dep_queue,
bus_T inp_mem[VTA_INP_BUFF_DEPTH][INP_MAT_AXI_RATIO],
bus_T wgt_mem[VTA_WGT_BUFF_DEPTH][WGT_MAT_AXI_RATIO]) {
#pragma HLS INTERFACE m_axi port = inputs offset = slave bundle = data_port
#pragma HLS INTERFACE m_axi port = weights offset = slave bundle = data_port
#pragma HLS INTERFACE axis port = load_queue
#pragma HLS INTERFACE axis port = g2l_dep_queue
#pragma HLS INTERFACE axis port = l2g_dep_queue
#pragma HLS INTERFACE bram port = wgt_mem
#pragma HLS INTERFACE bram port = inp_mem
#pragma HLS INTERFACE s_axilite port = return bundle = CONTROL_BUS
#pragma HLS RESOURCE variable = inp_mem core = RAM_1P
#pragma HLS RESOURCE variable = wgt_mem core = RAM_1P
// Pop load instruction
insn_T raw_insn = load_queue.read();
// Cast to MemInsn
insn_T raw_copy = raw_insn;
VTAMemInsn insn = *((VTAMemInsn *) &raw_copy);
// Pop dependence token if instructed
if (insn.pop_next_dep) {
g2l_dep_queue.read();
}
// Pre-processing
memop_sram_T x_width = (insn.x_pad_0 + insn.x_size + insn.x_pad_1);
memop_sram_T y_offset_0 = x_width * insn.y_pad_0;
#pragma HLS RESOURCE variable = y_offset_0 core = Mul_LUT latency = 4
memop_sram_T y_offset_1 = x_width * insn.y_pad_1;
#pragma HLS RESOURCE variable = y_offset_1 core = Mul_LUT latency = 4
if (insn.memory_type == VTA_MEM_ID_INP) {
load_pad_2d<bus_T, INP_MAT_AXI_RATIO, VTA_INP_ELEM_BYTES>(
inputs,
inp_mem,
insn.sram_base,
insn.dram_base,
insn.y_size,
insn.x_size,
insn.x_stride,
insn.x_pad_0,
insn.x_pad_1,
y_offset_0,
y_offset_1);
} else if (insn.memory_type == VTA_MEM_ID_WGT) {
load_2d<bus_T, WGT_MAT_AXI_RATIO, VTA_WGT_ELEM_BYTES>(
weights,
wgt_mem,
insn.sram_base,
insn.dram_base,
insn.y_size,
insn.x_size,
insn.x_stride);
}
// Push dependence token if instructed
if (insn.push_next_dep) {
l2g_dep_queue.write(1);
}
}