config/pkg_config.py (215 lines of code) (raw):

# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """VTA Package configuration module This module is dependency free and can be used to configure package. """ from __future__ import absolute_import as _abs import json import glob import os def get_vta_hw_path(): """Get the VTA HW path.""" curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) vta_hw_default = os.path.abspath(os.path.join(curr_path, "..")) VTA_HW_PATH = os.getenv('VTA_HW_PATH', vta_hw_default) return VTA_HW_PATH def get_tvm_path(): """Get the TVM path.""" curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) tvm_default = os.path.abspath(os.path.join(curr_path, "../../..")) TVM_PATH = os.getenv('TVM_PATH', tvm_default) return TVM_PATH class PkgConfig(object): """Simple package config tool for VTA. This is used to provide runtime specific configurations. Parameters ---------- cfg : dict The config dictionary """ cfg_keys = [ "TARGET", "LOG_INP_WIDTH", "LOG_WGT_WIDTH", "LOG_ACC_WIDTH", "LOG_BATCH", "LOG_BLOCK", "LOG_UOP_BUFF_SIZE", "LOG_INP_BUFF_SIZE", "LOG_WGT_BUFF_SIZE", "LOG_ACC_BUFF_SIZE", ] def __init__(self, cfg): # Derived parameters cfg["LOG_BLOCK_IN"] = cfg["LOG_BLOCK"] cfg["LOG_BLOCK_OUT"] = cfg["LOG_BLOCK"] cfg["LOG_OUT_WIDTH"] = cfg["LOG_INP_WIDTH"] cfg["LOG_OUT_BUFF_SIZE"] = ( cfg["LOG_ACC_BUFF_SIZE"] + cfg["LOG_OUT_WIDTH"] - cfg["LOG_ACC_WIDTH"]) # Update cfg now that we've extended it self.__dict__.update(cfg) # VTA_HW path and TVM_PATH vta_hw_path = get_vta_hw_path() tvm_path = get_tvm_path() # Include path self.include_path = [ "-I%s/include" % tvm_path, "-I%s/include" % vta_hw_path, "-I%s/3rdparty/dlpack/include" % tvm_path, "-I%s/3rdparty/dmlc-core/include" % tvm_path ] # List of source files that can be used to build standalone library. self.lib_source = [] self.lib_source += glob.glob("%s/src/*.cc" % vta_hw_path) if self.TARGET in ["pynq", "ultra96", "zcu104"]: # add pynq drivers for any board that uses pynq driver stack (see pynq.io) self.lib_source += glob.glob("%s/src/pynq/*.cc" % vta_hw_path) elif self.TARGET in ["de10nano"]: self.lib_source += glob.glob("%s/src/de10nano/*.cc" % vta_hw_path) self.include_path += [ "-I%s/src/de10nano" % vta_hw_path, "-I%s/3rdparty" % tvm_path ] # Linker flags if self.TARGET in ["pynq", "ultra96", "zcu104"]: self.ldflags = [ "-L/usr/lib", "-l:libcma.so"] else: self.ldflags = [] # Derive bitstream config string. self.bitstream = "{}x{}_i{}w{}a{}_{}_{}_{}_{}".format( (1 << cfg["LOG_BATCH"]), (1 << cfg["LOG_BLOCK"]), (1 << cfg["LOG_INP_WIDTH"]), (1 << cfg["LOG_WGT_WIDTH"]), (1 << cfg["LOG_ACC_WIDTH"]), cfg["LOG_UOP_BUFF_SIZE"], cfg["LOG_INP_BUFF_SIZE"], cfg["LOG_WGT_BUFF_SIZE"], cfg["LOG_ACC_BUFF_SIZE"]) # Derive FPGA parameters from target # - device: part number # - family: fpga family # - freq: PLL frequency # - per: clock period to achieve in HLS # (how aggressively design is pipelined) # - axi_bus_width: axi bus width used for DMA transactions # (property of FPGA memory interface) # - axi_cache_bits: ARCACHE/AWCACHE signals for the AXI bus # (e.g. 1111 is write-back read and write allocate) # - axi_prot_bits: ARPROT/AWPROT signals for the AXI bus if self.TARGET == "de10nano": self.fpga_device = "5CSEBA6U23I7" self.fpga_family = "Cyclone\\ V" # TODO: The following parameters have not been propagated into # current Chisel-based implement of VTA hardware for DE10-Nano. # A future change should be made to propagate these parameters, # in order to avoid duplicated definition. self.fpga_freq = 100 self.fpga_per = 2 self.fpga_log_axi_bus_width = 6 self.axi_prot_bits = '100' # IP register address map self.ip_reg_map_range = "0x1000" self.fetch_base_addr = "0xFF220000" self.load_base_addr = "0xFF221000" self.compute_base_addr = "0xFF222000" self.store_base_addr = "0xFF223000" elif self.TARGET == "ultra96": self.fpga_device = "xczu3eg-sbva484-1-e" self.fpga_family = "zynq-ultrascale+" self.fpga_board = None self.fpga_board_rev = None self.fpga_freq = 333 self.fpga_per = 2 self.fpga_log_axi_bus_width = 7 self.axi_prot_bits = '010' # IP register address map self.ip_reg_map_range = "0x1000" self.fetch_base_addr = "0xA0000000" self.load_base_addr = "0xA0001000" self.compute_base_addr = "0xA0002000" self.store_base_addr = "0xA0003000" elif self.TARGET == "zcu104": self.fpga_device = "xczu7ev-ffvc1156-2-e" self.fpga_family = "zynq-ultrascale+" self.fpga_board = "xilinx.com:zcu104:part0" self.fpga_board_rev = "1.1" self.fpga_freq = 333 self.fpga_per = 2 self.fpga_log_axi_bus_width = 7 self.axi_prot_bits = '010' # IP register address map self.ip_reg_map_range = "0x1000" self.fetch_base_addr = "0xA0000000" self.load_base_addr = "0xA0001000" self.compute_base_addr = "0xA0002000" self.store_base_addr = "0xA0003000" else: # By default, we use the pynq parameters self.fpga_device = "xc7z020clg484-1" self.fpga_family = "zynq-7000" self.fpga_board = None self.fpga_board_rev = None self.fpga_freq = 100 self.fpga_per = 7 self.fpga_log_axi_bus_width = 6 self.axi_prot_bits = '000' # IP register address map self.ip_reg_map_range = "0x1000" self.fetch_base_addr = "0x43C00000" self.load_base_addr = "0x43C01000" self.compute_base_addr = "0x43C02000" self.store_base_addr = "0x43C03000" # Set coherence settings coherent = True if coherent: self.axi_cache_bits = '1111' self.coherent = True # Define IP memory mapped registers offsets. # In HLS 0x00-0x0C is reserved for block-level I/O protocol. # Make sure to leave 8B between register offsets to maintain # compatibility with 64bit systems. self.fetch_insn_count_offset = 0x10 self.fetch_insn_addr_offset = self.fetch_insn_count_offset + 0x08 self.load_inp_addr_offset = 0x10 self.load_wgt_addr_offset = self.load_inp_addr_offset + 0x08 self.compute_done_wr_offset = 0x10 self.compute_done_rd_offset = self.compute_done_wr_offset + 0x08 self.compute_uop_addr_offset = self.compute_done_rd_offset + 0x08 self.compute_bias_addr_offset = self.compute_uop_addr_offset + 0x08 self.store_out_addr_offset = 0x10 # Derive SRAM parameters # The goal here is to determine how many memory banks are needed, # how deep and wide each bank needs to be. This is derived from # the size of each memory element (result of data width, and tensor shape), # and also how wide a memory can be as permitted by the FPGA tools. # # The mem axi ratio is a parameter used by HLS to resize memories # so memory read/write ports are the same size as the design axi bus width. # # Max bus width allowed (property of FPGA vendor toolchain) max_bus_width = 1024 # Bus width of a memory interface mem_bus_width = 1 << self.fpga_log_axi_bus_width # Input memory inp_mem_bus_width = 1 << (cfg["LOG_INP_WIDTH"] + \ cfg["LOG_BATCH"] + \ cfg["LOG_BLOCK_IN"]) self.inp_mem_size = 1 << cfg["LOG_INP_BUFF_SIZE"] # bytes self.inp_mem_banks = (inp_mem_bus_width + \ max_bus_width - 1) // \ max_bus_width self.inp_mem_width = min(inp_mem_bus_width, max_bus_width) self.inp_mem_depth = self.inp_mem_size * 8 // inp_mem_bus_width self.inp_mem_axi_ratio = self.inp_mem_width // mem_bus_width # Weight memory wgt_mem_bus_width = 1 << (cfg["LOG_WGT_WIDTH"] + \ cfg["LOG_BLOCK_IN"] + \ cfg["LOG_BLOCK_OUT"]) self.wgt_mem_size = 1 << cfg["LOG_WGT_BUFF_SIZE"] # bytes self.wgt_mem_banks = (wgt_mem_bus_width + \ max_bus_width - 1) // \ max_bus_width self.wgt_mem_width = min(wgt_mem_bus_width, max_bus_width) self.wgt_mem_depth = self.wgt_mem_size * 8 // wgt_mem_bus_width self.wgt_mem_axi_ratio = self.wgt_mem_width // mem_bus_width # Output memory out_mem_bus_width = 1 << (cfg["LOG_OUT_WIDTH"] + \ cfg["LOG_BATCH"] + \ cfg["LOG_BLOCK_OUT"]) self.out_mem_size = 1 << cfg["LOG_OUT_BUFF_SIZE"] # bytes self.out_mem_banks = (out_mem_bus_width + \ max_bus_width - 1) // \ max_bus_width self.out_mem_width = min(out_mem_bus_width, max_bus_width) self.out_mem_depth = self.out_mem_size * 8 // out_mem_bus_width self.out_mem_axi_ratio = self.out_mem_width // mem_bus_width # Macro defs self.macro_defs = [] self.cfg_dict = {} for key in cfg: self.macro_defs.append("-DVTA_%s=%s" % (key, str(cfg[key]))) self.cfg_dict[key] = cfg[key] self.macro_defs.append("-DVTA_LOG_BUS_WIDTH=%s" % (self.fpga_log_axi_bus_width)) # Macros used by the VTA driver self.macro_defs.append("-DVTA_IP_REG_MAP_RANGE=%s" % (self.ip_reg_map_range)) self.macro_defs.append("-DVTA_FETCH_ADDR=%s" % (self.fetch_base_addr)) self.macro_defs.append("-DVTA_LOAD_ADDR=%s" % (self.load_base_addr)) self.macro_defs.append("-DVTA_COMPUTE_ADDR=%s" % (self.compute_base_addr)) self.macro_defs.append("-DVTA_STORE_ADDR=%s" % (self.store_base_addr)) # IP register offsets self.macro_defs.append("-DVTA_FETCH_INSN_COUNT_OFFSET=%s" % \ (self.fetch_insn_count_offset)) self.macro_defs.append("-DVTA_FETCH_INSN_ADDR_OFFSET=%s" % \ (self.fetch_insn_addr_offset)) self.macro_defs.append("-DVTA_LOAD_INP_ADDR_OFFSET=%s" % \ (self.load_inp_addr_offset)) self.macro_defs.append("-DVTA_LOAD_WGT_ADDR_OFFSET=%s" % \ (self.load_wgt_addr_offset)) self.macro_defs.append("-DVTA_COMPUTE_DONE_WR_OFFSET=%s" % \ (self.compute_done_wr_offset)) self.macro_defs.append("-DVTA_COMPUTE_DONE_RD_OFFSET=%s" % \ (self.compute_done_rd_offset)) self.macro_defs.append("-DVTA_COMPUTE_UOP_ADDR_OFFSET=%s" % \ (self.compute_uop_addr_offset)) self.macro_defs.append("-DVTA_COMPUTE_BIAS_ADDR_OFFSET=%s" % \ (self.compute_bias_addr_offset)) self.macro_defs.append("-DVTA_STORE_OUT_ADDR_OFFSET=%s" % \ (self.store_out_addr_offset)) # Coherency if coherent: self.macro_defs.append("-DVTA_COHERENT_ACCESSES=true") else: self.macro_defs.append("-DVTA_COHERENT_ACCESSES=false") @property def cflags(self): return self.include_path + self.macro_defs @property def cfg_json(self): return json.dumps(self.cfg_dict, indent=2) def same_config(self, cfg): """Compare if cfg is same as current config. Parameters ---------- cfg : the configuration The configuration Returns ------- equal : bool Whether the configuration is the same. """ for k, v in self.cfg_dict.items(): if k not in cfg: return False if cfg[k] != v: return False return True